Let's make sure splitting a resource on memory hotunplug will never fail.
This will become more relevant once we merge selected system ram
resources - then, we'll trigger that case more frequently on memory
hotunplug.

In general, this function is already unlikely to fail. When we remove
memory, we free up quite a lot of metadata (memmap, page tables, memory
block device, etc.). The only way it could really fail currently is when
injecting allocation errors.

All other error cases inside release_mem_region_adjustable() seem to be
sanity checks if the function would be abused in different context -
let's add WARN_ON_ONCE().

Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Michal Hocko <mho...@suse.com>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: Jason Gunthorpe <j...@ziepe.ca>
Cc: Kees Cook <keesc...@chromium.org>
Cc: Ard Biesheuvel <a...@kernel.org>
Cc: Pankaj Gupta <pankaj.gupta.li...@gmail.com>
Cc: Baoquan He <b...@redhat.com>
Cc: Wei Yang <richardw.y...@linux.intel.com>
Signed-off-by: David Hildenbrand <da...@redhat.com>
---
 include/linux/ioport.h |  4 ++--
 kernel/resource.c      | 49 ++++++++++++++++++++++++------------------
 mm/memory_hotplug.c    | 22 +------------------
 3 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6c2b06fe8beb7..52a91f5fa1a36 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -248,8 +248,8 @@ extern struct resource * __request_region(struct resource *,
 extern void __release_region(struct resource *, resource_size_t,
                                resource_size_t);
 #ifdef CONFIG_MEMORY_HOTREMOVE
-extern int release_mem_region_adjustable(struct resource *, resource_size_t,
-                               resource_size_t);
+extern void release_mem_region_adjustable(struct resource *, resource_size_t,
+                                         resource_size_t);
 #endif
 
 /* Wrappers for managed devices */
diff --git a/kernel/resource.c b/kernel/resource.c
index 841737bbda9e5..1dcef5d53d76e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1255,21 +1255,28 @@ EXPORT_SYMBOL(__release_region);
  *   assumes that all children remain in the lower address entry for
  *   simplicity.  Enhance this logic when necessary.
  */
-int release_mem_region_adjustable(struct resource *parent,
-                                 resource_size_t start, resource_size_t size)
+void release_mem_region_adjustable(struct resource *parent,
+                                  resource_size_t start, resource_size_t size)
 {
+       struct resource *new_res = NULL;
+       bool alloc_nofail = false;
        struct resource **p;
        struct resource *res;
-       struct resource *new_res;
        resource_size_t end;
-       int ret = -EINVAL;
 
        end = start + size - 1;
-       if ((start < parent->start) || (end > parent->end))
-               return ret;
+       if (WARN_ON_ONCE((start < parent->start) || (end > parent->end)))
+               return;
 
-       /* The alloc_resource() result gets checked later */
-       new_res = alloc_resource(GFP_KERNEL);
+       /*
+        * We free up quite a lot of memory on memory hotunplug (esp., memap),
+        * just before releasing the region. This is highly unlikely to
+        * fail - let's play save and make it never fail as the caller cannot
+        * perform any error handling (e.g., trying to re-add memory will fail
+        * similarly).
+        */
+retry:
+       new_res = alloc_resource(GFP_KERNEL | alloc_nofail ? __GFP_NOFAIL : 0);
 
        p = &parent->child;
        write_lock(&resource_lock);
@@ -1295,7 +1302,6 @@ int release_mem_region_adjustable(struct resource *parent,
                 * so if we are dealing with them, let us just back off here.
                 */
                if (!(res->flags & IORESOURCE_SYSRAM)) {
-                       ret = 0;
                        break;
                }
 
@@ -1312,20 +1318,23 @@ int release_mem_region_adjustable(struct resource 
*parent,
                        /* free the whole entry */
                        *p = res->sibling;
                        free_resource(res);
-                       ret = 0;
                } else if (res->start == start && res->end != end) {
                        /* adjust the start */
-                       ret = __adjust_resource(res, end + 1,
-                                               res->end - end);
+                       WARN_ON_ONCE(__adjust_resource(res, end + 1,
+                                                      res->end - end));
                } else if (res->start != start && res->end == end) {
                        /* adjust the end */
-                       ret = __adjust_resource(res, res->start,
-                                               start - res->start);
+                       WARN_ON_ONCE(__adjust_resource(res, res->start,
+                                                      start - res->start));
                } else {
-                       /* split into two entries */
+                       /* split into two entries - we need a new resource */
                        if (!new_res) {
-                               ret = -ENOMEM;
-                               break;
+                               new_res = alloc_resource(GFP_ATOMIC);
+                               if (!new_res) {
+                                       alloc_nofail = true;
+                                       write_unlock(&resource_lock);
+                                       goto retry;
+                               }
                        }
                        new_res->name = res->name;
                        new_res->start = end + 1;
@@ -1336,9 +1345,8 @@ int release_mem_region_adjustable(struct resource *parent,
                        new_res->sibling = res->sibling;
                        new_res->child = NULL;
 
-                       ret = __adjust_resource(res, res->start,
-                                               start - res->start);
-                       if (ret)
+                       if (WARN_ON_ONCE(__adjust_resource(res, res->start,
+                                                          start - res->start)))
                                break;
                        res->sibling = new_res;
                        new_res = NULL;
@@ -1349,7 +1357,6 @@ int release_mem_region_adjustable(struct resource *parent,
 
        write_unlock(&resource_lock);
        free_resource(new_res);
-       return ret;
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 35d56cbd3e45b..f77fd1810e9f5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1700,26 +1700,6 @@ void try_offline_node(int nid)
 }
 EXPORT_SYMBOL(try_offline_node);
 
-static void __release_memory_resource(resource_size_t start,
-                                     resource_size_t size)
-{
-       int ret;
-
-       /*
-        * When removing memory in the same granularity as it was added,
-        * this function never fails. It might only fail if resources
-        * have to be adjusted or split. We'll ignore the error, as
-        * removing of memory cannot fail.
-        */
-       ret = release_mem_region_adjustable(&iomem_resource, start, size);
-       if (ret) {
-               resource_size_t endres = start + size - 1;
-
-               pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
-                       &start, &endres, ret);
-       }
-}
-
 static int __ref try_remove_memory(int nid, u64 start, u64 size)
 {
        int rc = 0;
@@ -1753,7 +1733,7 @@ static int __ref try_remove_memory(int nid, u64 start, 
u64 size)
                memblock_remove(start, size);
        }
 
-       __release_memory_resource(start, size);
+       release_mem_region_adjustable(&iomem_resource, start, size);
 
        try_offline_node(nid);
 
-- 
2.26.2

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to