Dropping the reference count of PageOffline() pages allows offlining
code to skip them. However, we also have to convert PG_reserved to
another flag - let's use PG_dirty - so has_unmovable_pages() will
properly handle them. PG_reserved pages get detected as unmovable right
away.

We need the flag to see if we are onlining pages the first time, or if
we allocated them via alloc_contig_range().

Properly take care of offlining code also modifying the stats and
special handling in case the driver gets unloaded.

Cc: "Michael S. Tsirkin" <[email protected]>
Cc: Jason Wang <[email protected]>
Cc: Oscar Salvador <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Igor Mammedov <[email protected]>
Cc: Dave Young <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Pavel Tatashin <[email protected]>
Cc: Stefan Hajnoczi <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Signed-off-by: David Hildenbrand <[email protected]>
---
 drivers/virtio/virtio_mem.c | 102 ++++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 10 deletions(-)

diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 91052a37d10d..9cb31459b211 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -561,6 +561,30 @@ static void virtio_mem_notify_online(struct virtio_mem 
*vm, unsigned long mb_id,
                virtio_mem_retry(vm);
 }
 
+/*
+ * When we unplug subblocks, we already modify stats (e.g., subtract them
+ * from totalram_pages). Offlining code will modify the stats, too. So
+ * properly fixup the stats when GOING_OFFLINE and revert that when
+ * CANCEL_OFFLINE.
+ */
+static void virtio_mem_mb_going_offline_fixup_stats(struct virtio_mem *vm,
+                                                   unsigned long mb_id,
+                                                   bool cancel)
+{
+       const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
+       int sb_id;
+
+       for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
+               if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+                       continue;
+
+               if (cancel)
+                       totalram_pages_add(-nr_pages);
+               else
+                       totalram_pages_add(nr_pages);
+       }
+}
+
 /*
  * This callback will either be called synchonously from add_memory() or
  * asynchronously (e.g., triggered via user space). We have to be careful
@@ -608,6 +632,7 @@ static int virtio_mem_memory_notifier_cb(struct 
notifier_block *nb,
                        mutex_lock(&vm->hotplug_mutex);
                        vm->hotplug_active = true;
                }
+               virtio_mem_mb_going_offline_fixup_stats(vm, mb_id, false);
                break;
        case MEM_GOING_ONLINE:
                spin_lock_irq(&vm->removal_lock);
@@ -633,6 +658,8 @@ static int virtio_mem_memory_notifier_cb(struct 
notifier_block *nb,
                mutex_unlock(&vm->hotplug_mutex);
                break;
        case MEM_CANCEL_OFFLINE:
+               virtio_mem_mb_going_offline_fixup_stats(vm, mb_id, true);
+               /* fall through */
        case MEM_CANCEL_ONLINE:
                /* We might not get a MEM_GOING* if somebody else canceled */
                if (vm->hotplug_active) {
@@ -648,23 +675,55 @@ static int virtio_mem_memory_notifier_cb(struct 
notifier_block *nb,
 }
 
 /*
- * Set a range of pages PG_offline.
+ * Convert PG_reserved to PG_dirty. Needed to allow isolation code to
+ * not immediately consider them as unmovable.
+ */
+static void virtio_mem_reserved_to_dirty(unsigned long pfn,
+                                        unsigned int nr_pages)
+{
+       for (; nr_pages--; pfn++) {
+               SetPageDirty(pfn_to_page(pfn));
+               ClearPageReserved(pfn_to_page(pfn));
+       }
+}
+
+/*
+ * Convert PG_dirty to PG_reserved. Needed so generic_online_page()
+ * works correctly.
+ */
+static void virtio_mem_dirty_to_reserved(unsigned long pfn,
+                                        unsigned int nr_pages)
+{
+       for (; nr_pages--; pfn++) {
+               SetPageReserved(pfn_to_page(pfn));
+               ClearPageDirty(pfn_to_page(pfn));
+       }
+}
+
+/*
+ * Set a range of pages PG_offline and drop the reference. The dropped
+ * reference (0) and the flag allows isolation code to isolate this range
+ * and offline code to offline it.
  */
 static void virtio_mem_set_fake_offline(unsigned long pfn,
                                        unsigned int nr_pages)
 {
-       for (; nr_pages--; pfn++)
+       for (; nr_pages--; pfn++) {
                __SetPageOffline(pfn_to_page(pfn));
+               page_ref_dec(pfn_to_page(pfn));
+       }
 }
 
 /*
- * Clear PG_offline from a range of pages.
+ * Get a reference and clear PG_offline from a range of pages.
  */
 static void virtio_mem_clear_fake_offline(unsigned long pfn,
                                          unsigned int nr_pages)
 {
-       for (; nr_pages--; pfn++)
+       for (; nr_pages--; pfn++) {
+               page_ref_inc(pfn_to_page(pfn));
                __ClearPageOffline(pfn_to_page(pfn));
+       }
 }
 
 /*
@@ -679,7 +738,7 @@ static void virtio_mem_fake_online(unsigned long pfn, 
unsigned int nr_pages)
        /*
         * We are always called with subblock granularity, which is at least
         * aligned to MAX_ORDER - 1. All pages in a subblock are either
-        * reserved or not.
+        * PG_dirty (converted PG_reserved) or not.
         */
        BUG_ON(!IS_ALIGNED(pfn, 1 << order));
        BUG_ON(!IS_ALIGNED(nr_pages, 1 << order));
@@ -690,13 +749,14 @@ static void virtio_mem_fake_online(unsigned long pfn, 
unsigned int nr_pages)
                struct page *page = pfn_to_page(pfn + i);
 
                /*
-                * If the page is reserved, it was kept fake-offline when
+                * If the page is PG_dirty, it was kept fake-offline when
                 * onlining the memory block. Otherwise, it was allocated
                 * using alloc_contig_range().
                 */
-               if (PageReserved(page))
+               if (PageDirty(page)) {
+                       virtio_mem_dirty_to_reserved(pfn + i, 1 << order);
                        generic_online_page(page, order);
-               else {
+               } else {
                        free_contig_range(pfn + i, 1 << order);
                        totalram_pages_add(1 << order);
                }
@@ -728,8 +788,10 @@ static void virtio_mem_online_page_cb(struct page *page, 
unsigned int order)
                 */
                if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
                        generic_online_page(page, order);
-               else
+               else {
                        virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order);
+                       virtio_mem_reserved_to_dirty(PFN_DOWN(addr), 1 << 
order);
+               }
                rcu_read_unlock();
                return;
        }
@@ -1674,7 +1736,8 @@ static int virtio_mem_probe(struct virtio_device *vdev)
 static void virtio_mem_remove(struct virtio_device *vdev)
 {
        struct virtio_mem *vm = vdev->priv;
-       unsigned long mb_id;
+       unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
+       unsigned long pfn, mb_id, sb_id;
        int rc;
 
        /*
@@ -1701,6 +1764,25 @@ static void virtio_mem_remove(struct virtio_device *vdev)
                BUG_ON(rc);
                mutex_lock(&vm->hotplug_mutex);
        }
+       /*
+        * After we unregistered our callbacks, user space can offline +
+        * re-online partially plugged online blocks. Make sure they can't
+        * get offlined by getting a reference. Also, restore PG_reserved.
+        */
+       virtio_mem_for_each_mb_state(vm, mb_id,
+                                    VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
+               for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
+                       if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+                               continue;
+                       pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
+                             sb_id * vm->subblock_size);
+
+                       if (PageDirty(pfn_to_page(pfn)))
+                               virtio_mem_dirty_to_reserved(pfn, nr_pages);
+                       for (; nr_pages--; pfn++)
+                               page_ref_inc(pfn_to_page(pfn));
+               }
+       }
        mutex_unlock(&vm->hotplug_mutex);
 
        /* unregister callbacks */
-- 
2.21.0

Reply via email to