virtio-mem wants to allow to offline memory blocks of which some parts
were unplugged, especially, to later offline and remove completely
unplugged memory blocks. The important part is that PageOffline() has
to remain set until the section is offline, so these pages will never
get accessed (e.g., when dumping). The pages should not be handed
back to the buddy (which would require clearing PageOffline() and
result in issues if offlining fails and the pages are suddenly in the
buddy).

Let's use "PageOffline() + reference count = 0" as a sign to
memory offlining code that these pages can simply be skipped when
offlining, similar to free or HWPoison pages.

Pass flags to test_pages_isolated(), similar as already done for
has_unmovable_pages(). Use a new flag to indicate the
requirement of memory offlining to skip over these special pages.

In has_unmovable_pages(), make sure the pages won't be detected as
movable. This is not strictly necessary, however makes e.g.,
alloc_contig_range() stop early, trying to isolate such page blocks -
compared to failing later when testing if all pages were isolated.

Also, make sure that when a reference to a PageOffline() page is
dropped, that the page will not be returned to the buddy.

memory devices (like virtio-mem) that want to make use of this
functionality have to make sure to synchronize against memory offlining,
using the memory hotplug notifier.

Alternative: Allow to offline with a reference count of 1
and use some other sign in the struct page that offlining is permitted.

Cc: Andrew Morton <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: Pavel Tatashin <[email protected]>
Cc: Alexander Duyck <[email protected]>
Cc: Anthony Yznaga <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Oscar Salvador <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Pingfan Liu <[email protected]>
Cc: Qian Cai <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Mike Rapoport <[email protected]>
Cc: Wei Yang <[email protected]>
Cc: Alexander Potapenko <[email protected]>
Cc: Anshuman Khandual <[email protected]>
Cc: Jason Gunthorpe <[email protected]>
Cc: Stephen Rothwell <[email protected]>
Cc: Mauro Carvalho Chehab <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Yu Zhao <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Yang Shi <[email protected]>
Cc: Ira Weiny <[email protected]>
Cc: Andrey Ryabinin <[email protected]>
Signed-off-by: David Hildenbrand <[email protected]>
---
 include/linux/page-flags.h     |  4 ++++
 include/linux/page-isolation.h |  4 +++-
 mm/memory_hotplug.c            |  9 ++++++---
 mm/page_alloc.c                | 22 +++++++++++++++++++++-
 mm/page_isolation.c            | 18 +++++++++++++-----
 mm/swap.c                      |  9 +++++++++
 6 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f91cb8898ff0..7e563eab6b4b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -745,6 +745,10 @@ PAGE_TYPE_OPS(Buddy, buddy)
  * not onlined when onlining the section).
  * The content of these pages is effectively stale. Such pages should not
  * be touched (read/write/dump/save) except by their owner.
+ *
+ * PageOffline() pages that have a reference count of 0 will be treated
+ * like free pages when offlining pages, allowing the containing memory
+ * block to get offlined.
  */
 PAGE_TYPE_OPS(Offline, offline)
 
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 1099c2fee20f..024e02b60346 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -32,6 +32,8 @@ static inline bool is_migrate_isolate(int migratetype)
 
 #define SKIP_HWPOISON  0x1
 #define REPORT_FAILURE 0x2
+/* Skip PageOffline() pages with a reference count of 0. */
+#define SKIP_OFFLINE   0x4
 
 bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                         int migratetype, int flags);
@@ -58,7 +60,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned 
long end_pfn,
  * Test all pages in [start_pfn, end_pfn) are isolated or not.
  */
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
-                       bool skip_hwpoisoned_pages);
+                       int flags);
 
 struct page *alloc_migrate_target(struct page *page, unsigned long private);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f08eb429b8f3..d23ff7c5c96b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1127,7 +1127,8 @@ static bool is_pageblock_removable_nolock(unsigned long 
pfn)
        if (!zone_spans_pfn(zone, pfn))
                return false;
 
-       return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, 
SKIP_HWPOISON);
+       return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE,
+                                   SKIP_HWPOISON | SKIP_OFFLINE);
 }
 
 /* Checks if this range of memory is likely to be hot-removable. */
@@ -1344,7 +1345,8 @@ static int
 check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
                        void *data)
 {
-       return test_pages_isolated(start_pfn, start_pfn + nr_pages, true);
+       return test_pages_isolated(start_pfn, start_pfn + nr_pages,
+                                  SKIP_HWPOISON | SKIP_OFFLINE);
 }
 
 static int __init cmdline_parse_movable_node(char *p)
@@ -1455,7 +1457,8 @@ static int __ref __offline_pages(unsigned long start_pfn,
        /* set above range as isolated */
        ret = start_isolate_page_range(start_pfn, end_pfn,
                                       MIGRATE_MOVABLE,
-                                      SKIP_HWPOISON | REPORT_FAILURE);
+                                      SKIP_HWPOISON | REPORT_FAILURE |
+                                      SKIP_OFFLINE);
        if (ret < 0) {
                reason = "failure to isolate range";
                goto failed_removal;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d5d7944954b3..fef74720d8b4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8221,6 +8221,15 @@ bool has_unmovable_pages(struct zone *zone, struct page 
*page, int count,
                if (!page_ref_count(page)) {
                        if (PageBuddy(page))
                                iter += (1 << page_order(page)) - 1;
+                       /*
+                       * Memory devices allow to offline a page if it is
+                       * marked PG_offline and has a reference count of 0.
+                       * However, the pages are not movable as it would be
+                       * required e.g., for alloc_contig_range().
+                       */
+                       if (PageOffline(page) && !(flags & SKIP_OFFLINE))
+                               if (++found > count)
+                                       goto unmovable;
                        continue;
                }
 
@@ -8444,7 +8453,7 @@ int alloc_contig_range(unsigned long start, unsigned long 
end,
        }
 
        /* Make sure the range is really isolated. */
-       if (test_pages_isolated(outer_start, end, false)) {
+       if (test_pages_isolated(outer_start, end, 0)) {
                pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
                        __func__, outer_start, end);
                ret = -EBUSY;
@@ -8563,6 +8572,17 @@ __offline_isolated_pages(unsigned long start_pfn, 
unsigned long end_pfn)
                        offlined_pages++;
                        continue;
                }
+               /*
+                * Memory devices allow to offline a page if it is marked
+                * PG_offline and has a reference count of 0.
+                */
+               if (PageOffline(page) && !page_count(page)) {
+                       BUG_ON(PageBuddy(page));
+                       pfn++;
+                       SetPageReserved(page);
+                       offlined_pages++;
+                       continue;
+               }
 
                BUG_ON(page_count(page));
                BUG_ON(!PageBuddy(page));
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 89c19c0feadb..0a75019d7e7c 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -171,6 +171,8 @@ __first_valid_page(unsigned long pfn, unsigned long 
nr_pages)
  *                     SKIP_HWPOISON - ignore hwpoison pages
  *                     REPORT_FAILURE - report details about the failure to
  *                     isolate the range
+ *                     SKIP_OFFLINE - ignore PageOffline() pages with a
+ *                     reference count of 0
  *
  * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
  * the range will never be allocated. Any free pages and pages freed in the
@@ -257,7 +259,7 @@ void undo_isolate_page_range(unsigned long start_pfn, 
unsigned long end_pfn,
  */
 static unsigned long
 __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
-                                 bool skip_hwpoisoned_pages)
+                                 int flags)
 {
        struct page *page;
 
@@ -274,9 +276,16 @@ __test_page_isolated_in_pageblock(unsigned long pfn, 
unsigned long end_pfn,
                         * simple way to verify that as VM_BUG_ON(), though.
                         */
                        pfn += 1 << page_order(page);
-               else if (skip_hwpoisoned_pages && PageHWPoison(page))
+               else if ((flags & SKIP_HWPOISON) && PageHWPoison(page))
                        /* A HWPoisoned page cannot be also PageBuddy */
                        pfn++;
+               else if ((flags & SKIP_OFFLINE) && PageOffline(page) &&
+                        !page_count(page))
+                       /*
+                        * Memory devices allow to offline a page if it is
+                        * marked PG_offline and has a reference count of 0.
+                        */
+                       pfn++;
                else
                        break;
        }
@@ -286,7 +295,7 @@ __test_page_isolated_in_pageblock(unsigned long pfn, 
unsigned long end_pfn,
 
 /* Caller should ensure that requested range is in a single zone */
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
-                       bool skip_hwpoisoned_pages)
+                       int isol_flags)
 {
        unsigned long pfn, flags;
        struct page *page;
@@ -308,8 +317,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned 
long end_pfn,
        /* Check all pages are free or marked as ISOLATED */
        zone = page_zone(page);
        spin_lock_irqsave(&zone->lock, flags);
-       pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn,
-                                               skip_hwpoisoned_pages);
+       pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags);
        spin_unlock_irqrestore(&zone->lock, flags);
 
        trace_test_pages_isolated(start_pfn, end_pfn, pfn);
diff --git a/mm/swap.c b/mm/swap.c
index 38c3fa4308e2..f98987656ecc 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -107,6 +107,15 @@ void __put_page(struct page *page)
                 * not return it to page allocator.
                 */
                return;
+       } else if (PageOffline(page)) {
+               /*
+                * Memory devices allow to offline a page if it is
+                * marked PG_offline and has a reference count of 0. So if
+                * somebody puts a reference of such a page and the
+                * reference count drops to 0, don't return the page to the
+                * buddy.
+                */
+               return;
        }
 
        if (unlikely(PageCompound(page)))
-- 
2.21.0

Reply via email to