'struct mem_section_usage' combines the existing 'pageblock_flags' bitmap
with a new 'map_active' bitmap.  The new bitmap enables the memory
hot{plug,remove} implementation to act on incremental sub-divisions of
a section. The primary impetus for this functionality is to support
platforms that mix "System RAM" and "Persistent Memory" within a single
section.  We want to be able to hotplug "Persistent Memory" to extend a
partially populated section and share that section between ZONE_DEVICE and
ZONE_NORMAL/MOVABLE memory.

This introduces a pointer to the new 'map_active' bitmap through struct
mem_section, but otherwise should not change any behavior.

Cc: Michal Hocko <mho...@suse.com>
Cc: Vlastimil Babka <vba...@suse.cz>
Cc: Johannes Weiner <han...@cmpxchg.org>
Cc: Logan Gunthorpe <log...@deltatee.com>
Cc: Mel Gorman <mgor...@techsingularity.net>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Stephen Bates <stephen.ba...@microsemi.com>
Signed-off-by: Dan Williams <dan.j.willi...@intel.com>
---
 include/linux/mmzone.h |   21 +++++++++-
 mm/memory_hotplug.c    |    4 +-
 mm/page_alloc.c        |    2 -
 mm/sparse.c            |   98 ++++++++++++++++++++++++++----------------------
 4 files changed, 75 insertions(+), 50 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ffa9503d5be5..82a1af3afa04 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1070,6 +1070,19 @@ static inline unsigned long early_pfn_to_nid(unsigned 
long pfn)
 #define SECTION_ALIGN_UP(pfn)  (((pfn) + PAGES_PER_SECTION - 1) & 
PAGE_SECTION_MASK)
 #define SECTION_ALIGN_DOWN(pfn)        ((pfn) & PAGE_SECTION_MASK)
 
+#define SECTION_ACTIVE_SIZE ((1UL << SECTION_SIZE_BITS) / BITS_PER_LONG)
+#define SECTION_ACTIVE_MASK (~(SECTION_ACTIVE_SIZE - 1))
+
+struct mem_section_usage {
+       /*
+        * SECTION_ACTIVE_SIZE portions of the section that are populated in
+        * the memmap
+        */
+       unsigned long map_active;
+       /* See declaration of similar field in struct zone */
+       unsigned long pageblock_flags[0];
+};
+
 struct page;
 struct page_ext;
 struct mem_section {
@@ -1087,8 +1100,7 @@ struct mem_section {
         */
        unsigned long section_mem_map;
 
-       /* See declaration of similar field in struct zone */
-       unsigned long *pageblock_flags;
+       struct mem_section_usage *usage;
 #ifdef CONFIG_PAGE_EXTENSION
        /*
         * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
@@ -1119,6 +1131,11 @@ extern struct mem_section *mem_section[NR_SECTION_ROOTS];
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
+static inline unsigned long *section_to_usemap(struct mem_section *ms)
+{
+       return ms->usage->pageblock_flags;
+}
+
 static inline struct mem_section *__nr_to_section(unsigned long nr)
 {
        if (!mem_section[SECTION_NR_TO_ROOT(nr)])
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e1751ca002fa..07accab8441d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -235,7 +235,7 @@ static void register_page_bootmem_info_section(unsigned 
long start_pfn)
        for (i = 0; i < mapsize; i++, page++)
                get_page_bootmem(section_nr, page, SECTION_INFO);
 
-       usemap = __nr_to_section(section_nr)->pageblock_flags;
+       usemap = section_to_usemap(__nr_to_section(section_nr));
        page = virt_to_page(usemap);
 
        mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
@@ -261,7 +261,7 @@ static void register_page_bootmem_info_section(unsigned 
long start_pfn)
 
        register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
 
-       usemap = __nr_to_section(section_nr)->pageblock_flags;
+       usemap = section_to_usemap(__nr_to_section(section_nr));
        page = virt_to_page(usemap);
 
        mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6cbde310abed..50858eef1cc4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -357,7 +357,7 @@ static inline unsigned long *get_pageblock_bitmap(struct 
page *page,
                                                        unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
-       return __pfn_to_section(pfn)->pageblock_flags;
+       return section_to_usemap(__pfn_to_section(pfn));
 #else
        return page_zone(page)->pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
diff --git a/mm/sparse.c b/mm/sparse.c
index db6bf3c97ea2..d0d4c005dc60 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -233,15 +233,15 @@ struct page *sparse_decode_mem_map(unsigned long 
coded_mem_map, unsigned long pn
 
 static int __meminit sparse_init_one_section(struct mem_section *ms,
                unsigned long pnum, struct page *mem_map,
-               unsigned long *pageblock_bitmap)
+               struct mem_section_usage *usage)
 {
        if (!present_section(ms))
                return -EINVAL;
 
        ms->section_mem_map &= ~SECTION_MAP_MASK;
        ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
-                                                       SECTION_HAS_MEM_MAP;
-       ms->pageblock_flags = pageblock_bitmap;
+               SECTION_HAS_MEM_MAP;
+       ms->usage = usage;
 
        return 1;
 }
@@ -255,9 +255,13 @@ unsigned long usemap_size(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static unsigned long *__kmalloc_section_usemap(void)
+static struct mem_section_usage *__alloc_section_usage(void)
 {
-       return kmalloc(usemap_size(), GFP_KERNEL);
+       struct mem_section_usage *usage;
+
+       usage = kzalloc(sizeof(*usage) + usemap_size(), GFP_KERNEL);
+       /* TODO: allocate the map_active bitmap */
+       return usage;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
@@ -293,7 +297,8 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data 
*pgdat,
        return p;
 }
 
-static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+static void __init check_usemap_section_nr(int nid,
+               struct mem_section_usage *usage)
 {
        unsigned long usemap_snr, pgdat_snr;
        static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
@@ -301,7 +306,7 @@ static void __init check_usemap_section_nr(int nid, 
unsigned long *usemap)
        struct pglist_data *pgdat = NODE_DATA(nid);
        int usemap_nid;
 
-       usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
+       usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
        pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
        if (usemap_snr == pgdat_snr)
                return;
@@ -336,7 +341,8 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data 
*pgdat,
        return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
 }
 
-static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+static void __init check_usemap_section_nr(int nid,
+               struct mem_section_usage *usage)
 {
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
@@ -344,26 +350,27 @@ static void __init check_usemap_section_nr(int nid, 
unsigned long *usemap)
 static void __init sparse_early_usemaps_alloc_node(void *data,
                                 unsigned long pnum_begin,
                                 unsigned long pnum_end,
-                                unsigned long usemap_count, int nodeid)
+                                unsigned long usage_count, int nodeid)
 {
-       void *usemap;
+       void *usage;
        unsigned long pnum;
-       unsigned long **usemap_map = (unsigned long **)data;
-       int size = usemap_size();
+       struct mem_section_usage **usage_map = data;
+       int size = sizeof(struct mem_section_usage) + usemap_size();
 
-       usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
-                                                         size * usemap_count);
-       if (!usemap) {
+       usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
+                                                         size * usage_count);
+       if (!usage) {
                pr_warn("%s: allocation failed\n", __func__);
                return;
        }
 
+       memset(usage, 0, size * usage_count);
        for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
                if (!present_section_nr(pnum))
                        continue;
-               usemap_map[pnum] = usemap;
-               usemap += size;
-               check_usemap_section_nr(nodeid, usemap_map[pnum]);
+               usage_map[pnum] = usage;
+               usage += size;
+               check_usemap_section_nr(nodeid, usage_map[pnum]);
        }
 }
 
@@ -468,7 +475,7 @@ void __weak __meminit vmemmap_populate_print_last(void)
 
 /**
  *  alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap
- *  @map: usemap_map for pageblock flags or mmap_map for vmemmap
+ *  @map: usage_map for mem_section_usage or mmap_map for vmemmap
  */
 static void __init alloc_usemap_and_memmap(void (*alloc_func)
                                        (void *, unsigned long, unsigned long,
@@ -521,10 +528,9 @@ static void __init alloc_usemap_and_memmap(void 
(*alloc_func)
  */
 void __init sparse_init(void)
 {
+       struct mem_section_usage *usage, **usage_map;
        unsigned long pnum;
        struct page *map;
-       unsigned long *usemap;
-       unsigned long **usemap_map;
        int size;
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
        int size2;
@@ -539,21 +545,21 @@ void __init sparse_init(void)
 
        /*
         * map is using big page (aka 2M in x86 64 bit)
-        * usemap is less one page (aka 24 bytes)
+        * usage is less one page (aka 24 bytes)
         * so alloc 2M (with 2M align) and 24 bytes in turn will
         * make next 2M slip to one more 2M later.
         * then in big system, the memory will have a lot of holes...
         * here try to allocate 2M pages continuously.
         *
         * powerpc need to call sparse_init_one_section right after each
-        * sparse_early_mem_map_alloc, so allocate usemap_map at first.
+        * sparse_early_mem_map_alloc, so allocate usage_map at first.
         */
-       size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
-       usemap_map = memblock_virt_alloc(size, 0);
-       if (!usemap_map)
-               panic("can not allocate usemap_map\n");
+       size = sizeof(struct mem_section_usage *) * NR_MEM_SECTIONS;
+       usage_map = memblock_virt_alloc(size, 0);
+       if (!usage_map)
+               panic("can not allocate usage_map\n");
        alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
-                                                       (void *)usemap_map);
+                                                       (void *)usage_map);
 
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
        size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
@@ -568,8 +574,8 @@ void __init sparse_init(void)
                if (!present_section_nr(pnum))
                        continue;
 
-               usemap = usemap_map[pnum];
-               if (!usemap)
+               usage = usage_map[pnum];
+               if (!usage)
                        continue;
 
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
@@ -581,7 +587,7 @@ void __init sparse_init(void)
                        continue;
 
                sparse_init_one_section(__nr_to_section(pnum), pnum, map,
-                                                               usemap);
+                                                               usage);
        }
 
        vmemmap_populate_print_last();
@@ -589,7 +595,7 @@ void __init sparse_init(void)
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
        memblock_free_early(__pa(map_map), size2);
 #endif
-       memblock_free_early(__pa(usemap_map), size);
+       memblock_free_early(__pa(usage_map), size);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -693,9 +699,9 @@ int __meminit sparse_add_one_section(struct zone *zone, 
unsigned long start_pfn)
 {
        unsigned long section_nr = pfn_to_section_nr(start_pfn);
        struct pglist_data *pgdat = zone->zone_pgdat;
+       static struct mem_section_usage *usage;
        struct mem_section *ms;
        struct page *memmap;
-       unsigned long *usemap;
        unsigned long flags;
        int ret;
 
@@ -709,8 +715,8 @@ int __meminit sparse_add_one_section(struct zone *zone, 
unsigned long start_pfn)
        memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
        if (!memmap)
                return -ENOMEM;
-       usemap = __kmalloc_section_usemap();
-       if (!usemap) {
+       usage = __alloc_section_usage();
+       if (!usage) {
                __kfree_section_memmap(memmap);
                return -ENOMEM;
        }
@@ -727,12 +733,12 @@ int __meminit sparse_add_one_section(struct zone *zone, 
unsigned long start_pfn)
 
        ms->section_mem_map |= SECTION_MARKED_PRESENT;
 
-       ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
+       ret = sparse_init_one_section(ms, section_nr, memmap, usage);
 
 out:
        pgdat_resize_unlock(pgdat, &flags);
        if (ret <= 0) {
-               kfree(usemap);
+               kfree(usage);
                __kfree_section_memmap(memmap);
        }
        return ret;
@@ -760,19 +766,20 @@ static inline void clear_hwpoisoned_pages(struct page 
*memmap, int nr_pages)
 }
 #endif
 
-static void free_section_usemap(struct page *memmap, unsigned long *usemap)
+static void free_section_usage(struct page *memmap,
+               struct mem_section_usage *usage)
 {
        struct page *usemap_page;
 
-       if (!usemap)
+       if (!usage)
                return;
 
-       usemap_page = virt_to_page(usemap);
+       usemap_page = virt_to_page(usage->pageblock_flags);
        /*
         * Check to see if allocation came from hot-plug-add
         */
        if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
-               kfree(usemap);
+               kfree(usage);
                if (memmap)
                        __kfree_section_memmap(memmap);
                return;
@@ -790,23 +797,24 @@ static void free_section_usemap(struct page *memmap, 
unsigned long *usemap)
 void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
                unsigned long map_offset)
 {
+       unsigned long flags;
        struct page *memmap = NULL;
-       unsigned long *usemap = NULL, flags;
+       struct mem_section_usage *usage = NULL;
        struct pglist_data *pgdat = zone->zone_pgdat;
 
        pgdat_resize_lock(pgdat, &flags);
        if (ms->section_mem_map) {
-               usemap = ms->pageblock_flags;
+               usage = ms->usage;
                memmap = sparse_decode_mem_map(ms->section_mem_map,
                                                __section_nr(ms));
                ms->section_mem_map = 0;
-               ms->pageblock_flags = NULL;
+               ms->usage = NULL;
        }
        pgdat_resize_unlock(pgdat, &flags);
 
        clear_hwpoisoned_pages(memmap + map_offset,
                        PAGES_PER_SECTION - map_offset);
-       free_section_usemap(memmap, usemap);
+       free_section_usage(memmap, usage);
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif /* CONFIG_MEMORY_HOTPLUG */

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

Reply via email to