With memmap on memory, some architecture needs more details w.r.t altmap
such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of
computing them again when we remove a memory block, embed vmem_altmap
details in struct memory_block if we are using memmap on memory block
feature.

Acked-by: Michal Hocko <mho...@suse.com>
Acked-by: David Hildenbrand <da...@redhat.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.ibm.com>
---
 drivers/base/memory.c  | 27 +++++++++++++--------
 include/linux/memory.h |  8 ++-----
 mm/memory_hotplug.c    | 54 ++++++++++++++++++++++++++----------------
 3 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index b456ac213610..8191709c9ad2 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -105,7 +105,8 @@ EXPORT_SYMBOL(unregister_memory_notifier);
 static void memory_block_release(struct device *dev)
 {
        struct memory_block *mem = to_memory_block(dev);
-
+       /* Verify that the altmap is freed */
+       WARN_ON(mem->altmap);
        kfree(mem);
 }
 
@@ -183,7 +184,7 @@ static int memory_block_online(struct memory_block *mem)
 {
        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
        unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-       unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+       unsigned long nr_vmemmap_pages = 0;
        struct zone *zone;
        int ret;
 
@@ -200,6 +201,9 @@ static int memory_block_online(struct memory_block *mem)
         * stage helps to keep accounting easier to follow - e.g vmemmaps
         * belong to the same zone as the memory they backed.
         */
+       if (mem->altmap)
+               nr_vmemmap_pages = mem->altmap->free;
+
        if (nr_vmemmap_pages) {
                ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, 
zone);
                if (ret)
@@ -230,7 +234,7 @@ static int memory_block_offline(struct memory_block *mem)
 {
        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
        unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-       unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+       unsigned long nr_vmemmap_pages = 0;
        int ret;
 
        if (!mem->zone)
@@ -240,6 +244,9 @@ static int memory_block_offline(struct memory_block *mem)
         * Unaccount before offlining, such that unpopulated zone and kthreads
         * can properly be torn down in offline_pages().
         */
+       if (mem->altmap)
+               nr_vmemmap_pages = mem->altmap->free;
+
        if (nr_vmemmap_pages)
                adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
                                          -nr_vmemmap_pages);
@@ -726,7 +733,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
 #endif
 
 static int add_memory_block(unsigned long block_id, unsigned long state,
-                           unsigned long nr_vmemmap_pages,
+                           struct vmem_altmap *altmap,
                            struct memory_group *group)
 {
        struct memory_block *mem;
@@ -744,7 +751,7 @@ static int add_memory_block(unsigned long block_id, 
unsigned long state,
        mem->start_section_nr = block_id * sections_per_block;
        mem->state = state;
        mem->nid = NUMA_NO_NODE;
-       mem->nr_vmemmap_pages = nr_vmemmap_pages;
+       mem->altmap = altmap;
        INIT_LIST_HEAD(&mem->group_next);
 
 #ifndef CONFIG_NUMA
@@ -783,14 +790,14 @@ static int __init add_boot_memory_block(unsigned long 
base_section_nr)
        if (section_count == 0)
                return 0;
        return add_memory_block(memory_block_id(base_section_nr),
-                               MEM_ONLINE, 0,  NULL);
+                               MEM_ONLINE, NULL,  NULL);
 }
 
 static int add_hotplug_memory_block(unsigned long block_id,
-                                   unsigned long nr_vmemmap_pages,
+                                   struct vmem_altmap *altmap,
                                    struct memory_group *group)
 {
-       return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group);
+       return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
 }
 
 static void remove_memory_block(struct memory_block *memory)
@@ -818,7 +825,7 @@ static void remove_memory_block(struct memory_block *memory)
  * Called under device_hotplug_lock.
  */
 int create_memory_block_devices(unsigned long start, unsigned long size,
-                               unsigned long vmemmap_pages,
+                               struct vmem_altmap *altmap,
                                struct memory_group *group)
 {
        const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
@@ -832,7 +839,7 @@ int create_memory_block_devices(unsigned long start, 
unsigned long size,
                return -EINVAL;
 
        for (block_id = start_block_id; block_id != end_block_id; block_id++) {
-               ret = add_hotplug_memory_block(block_id, vmemmap_pages, group);
+               ret = add_hotplug_memory_block(block_id, altmap, group);
                if (ret)
                        break;
        }
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 31343566c221..f53cfdaaaa41 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -77,11 +77,7 @@ struct memory_block {
         */
        struct zone *zone;
        struct device dev;
-       /*
-        * Number of vmemmap pages. These pages
-        * lay at the beginning of the memory block.
-        */
-       unsigned long nr_vmemmap_pages;
+       struct vmem_altmap *altmap;
        struct memory_group *group;     /* group (if any) for this block */
        struct list_head group_next;    /* next block inside memory group */
 #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
@@ -147,7 +143,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, 
int pri)
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size,
-                               unsigned long vmemmap_pages,
+                               struct vmem_altmap *altmap,
                                struct memory_group *group);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
 extern void memory_dev_init(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 76b813991bdc..f8d3e7427e32 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1439,7 +1439,11 @@ int __ref add_memory_resource(int nid, struct resource 
*res, mhp_t mhp_flags)
        if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
                if (mhp_supports_memmap_on_memory(size)) {
                        mhp_altmap.free = memory_block_memmap_on_memory_pages();
-                       params.altmap = &mhp_altmap;
+                       params.altmap = kmalloc(sizeof(struct vmem_altmap), 
GFP_KERNEL);
+                       if (!params.altmap)
+                               goto error;
+
+                       memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap));
                }
                /* fallback to not using altmap  */
        }
@@ -1447,13 +1451,13 @@ int __ref add_memory_resource(int nid, struct resource 
*res, mhp_t mhp_flags)
        /* call arch's memory hotadd */
        ret = arch_add_memory(nid, start, size, &params);
        if (ret < 0)
-               goto error;
+               goto error_free;
 
        /* create memory block devices after memory was added */
-       ret = create_memory_block_devices(start, size, mhp_altmap.free, group);
+       ret = create_memory_block_devices(start, size, params.altmap, group);
        if (ret) {
                arch_remove_memory(start, size, NULL);
-               goto error;
+               goto error_free;
        }
 
        if (new_node) {
@@ -1490,6 +1494,8 @@ int __ref add_memory_resource(int nid, struct resource 
*res, mhp_t mhp_flags)
                walk_memory_blocks(start, size, NULL, online_memory_block);
 
        return ret;
+error_free:
+       kfree(params.altmap);
 error:
        if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
                memblock_remove(start, size);
@@ -2056,12 +2062,18 @@ static int check_memblock_offlined_cb(struct 
memory_block *mem, void *arg)
        return 0;
 }
 
-static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
+static int test_has_altmap_cb(struct memory_block *mem, void *arg)
 {
+       struct memory_block **mem_ptr = (struct memory_block **)arg;
        /*
-        * If not set, continue with the next block.
+        * return the memblock if we have altmap
+        * and break callback.
         */
-       return mem->nr_vmemmap_pages;
+       if (mem->altmap) {
+               *mem_ptr = mem;
+               return 1;
+       }
+       return 0;
 }
 
 static int check_cpu_on_node(int nid)
@@ -2136,10 +2148,9 @@ EXPORT_SYMBOL(try_offline_node);
 
 static int __ref try_remove_memory(u64 start, u64 size)
 {
-       struct vmem_altmap mhp_altmap = {};
-       struct vmem_altmap *altmap = NULL;
-       unsigned long nr_vmemmap_pages;
+       struct memory_block *mem;
        int rc = 0, nid = NUMA_NO_NODE;
+       struct vmem_altmap *altmap = NULL;
 
        BUG_ON(check_hotplug_memory_range(start, size));
 
@@ -2161,25 +2172,20 @@ static int __ref try_remove_memory(u64 start, u64 size)
         * the same granularity it was added - a single memory block.
         */
        if (mhp_memmap_on_memory()) {
-               nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
-                                                     get_nr_vmemmap_pages_cb);
-               if (nr_vmemmap_pages) {
+               rc = walk_memory_blocks(start, size, &mem, test_has_altmap_cb);
+               if (rc) {
                        if (size != memory_block_size_bytes()) {
                                pr_warn("Refuse to remove %#llx - %#llx,"
                                        "wrong granularity\n",
                                        start, start + size);
                                return -EINVAL;
                        }
-
+                       altmap = mem->altmap;
                        /*
-                        * Let remove_pmd_table->free_hugepage_table do the
-                        * right thing if we used vmem_altmap when hot-adding
-                        * the range.
+                        * Mark altmap NULL so that we can add a debug
+                        * check on memblock free.
                         */
-                       mhp_altmap.base_pfn = PHYS_PFN(start);
-                       mhp_altmap.free = nr_vmemmap_pages;
-                       mhp_altmap.alloc = nr_vmemmap_pages;
-                       altmap = &mhp_altmap;
+                       mem->altmap = NULL;
                }
        }
 
@@ -2196,6 +2202,12 @@ static int __ref try_remove_memory(u64 start, u64 size)
 
        arch_remove_memory(start, size, altmap);
 
+       /* Verify that all vmemmap pages have actually been freed. */
+       if (altmap) {
+               WARN(altmap->alloc, "Altmap not fully unmapped");
+               kfree(altmap);
+       }
+
        if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
                memblock_phys_free(start, size);
                memblock_remove(start, size);
-- 
2.41.0

Reply via email to