The implementation of the current virtio-balloon is not very
efficient, the time spends on different stages of inflating
the balloon to 7GB of a 8GB idle guest:

a. allocating pages (6.5%)
b. sending PFNs to host (68.3%)
c. address translation (6.1%)
d. madvise (19%)

It takes about 4126ms for the inflating process to complete.
Debugging shows that the bottle neck are the stage b and stage d.

If using a bitmap to send the page info instead of the PFNs, we
can reduce the overhead in stage b quite a lot. Furthermore, we
can do the address translation and call madvise() with a bulk of
RAM pages, instead of the current page per page way, the overhead
of stage c and stage d can also be reduced a lot.

This patch is the kernel side implementation which is intended to
speed up the inflating & deflating process by adding a new feature
to the virtio-balloon device. With this new feature, inflating the
balloon to 7GB of a 8GB idle guest only takes 590ms, the
performance improvement is about 85%.

TODO: optimize stage a by allocating/freeing a chunk of pages
instead of a single page at a time.

Signed-off-by: Liang Li <liang.z...@intel.com>
Suggested-by: Michael S. Tsirkin <m...@redhat.com>
Cc: Michael S. Tsirkin <m...@redhat.com>
Cc: Paolo Bonzini <pbonz...@redhat.com>
Cc: Cornelia Huck <cornelia.h...@de.ibm.com>
Cc: Amit Shah <amit.s...@redhat.com>
Cc: Dave Hansen <dave.han...@intel.com>
---
 drivers/virtio/virtio_balloon.c | 395 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 367 insertions(+), 28 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index f59cb4f..c3ddec3 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -42,6 +42,10 @@
 #define OOM_VBALLOON_DEFAULT_PAGES 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define BALLOON_BMAP_SIZE      (8 * PAGE_SIZE)
+#define PFNS_PER_BMAP          (BALLOON_BMAP_SIZE * BITS_PER_BYTE)
+#define BALLOON_BMAP_COUNT     32
+
 static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
@@ -67,6 +71,18 @@ struct virtio_balloon {
 
        /* Number of balloon pages we've told the Host we're not using. */
        unsigned int num_pages;
+       /* Pointer to the response header. */
+       void *resp_hdr;
+       /* Pointer to the start address of response data. */
+       unsigned long *resp_data;
+       /* Pointer offset of the response data. */
+       unsigned long resp_pos;
+       /* Bitmap and bitmap count used to tell the host the pages */
+       unsigned long *page_bitmap[BALLOON_BMAP_COUNT];
+       /* Number of split page bitmaps */
+       unsigned int nr_page_bmap;
+       /* Used to record the processed pfn range */
+       unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
        /*
         * The pages we've told the Host we're not using are enqueued
         * at vb_dev_info->pages list.
@@ -110,20 +126,228 @@ static void balloon_ack(struct virtqueue *vq)
        wake_up(&vb->acked);
 }
 
-static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+static inline void init_bmap_pfn_range(struct virtio_balloon *vb)
 {
-       struct scatterlist sg;
+       vb->min_pfn = ULONG_MAX;
+       vb->max_pfn = 0;
+}
+
+static inline void update_bmap_pfn_range(struct virtio_balloon *vb,
+                                struct page *page)
+{
+       unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+       vb->min_pfn = min(balloon_pfn, vb->min_pfn);
+       vb->max_pfn = max(balloon_pfn, vb->max_pfn);
+}
+
+static void extend_page_bitmap(struct virtio_balloon *vb,
+                               unsigned long nr_pfn)
+{
+       int i, bmap_count;
+       unsigned long bmap_len;
+
+       bmap_len = ALIGN(nr_pfn, BITS_PER_LONG) / BITS_PER_BYTE;
+       bmap_len = ALIGN(bmap_len, BALLOON_BMAP_SIZE);
+       bmap_count = min((int)(bmap_len / BALLOON_BMAP_SIZE),
+                                BALLOON_BMAP_COUNT);
+
+       for (i = 1; i < bmap_count; i++) {
+               vb->page_bitmap[i] = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+               if (vb->page_bitmap[i])
+                       vb->nr_page_bmap++;
+               else
+                       break;
+       }
+}
+
+static void free_extended_page_bitmap(struct virtio_balloon *vb)
+{
+       int i, bmap_count = vb->nr_page_bmap;
+
+
+       for (i = 1; i < bmap_count; i++) {
+               kfree(vb->page_bitmap[i]);
+               vb->page_bitmap[i] = NULL;
+               vb->nr_page_bmap--;
+       }
+}
+
+static void kfree_page_bitmap(struct virtio_balloon *vb)
+{
+       int i;
+
+       for (i = 0; i < vb->nr_page_bmap; i++)
+               kfree(vb->page_bitmap[i]);
+}
+
+static void clear_page_bitmap(struct virtio_balloon *vb)
+{
+       int i;
+
+       for (i = 0; i < vb->nr_page_bmap; i++)
+               memset(vb->page_bitmap[i], 0, BALLOON_BMAP_SIZE);
+}
+
+static unsigned long do_set_resp_bitmap(struct virtio_balloon *vb,
+       unsigned long *bitmap,  unsigned long base_pfn,
+       unsigned long pos, int nr_page)
+
+{
+       struct virtio_balloon_bmap_hdr *hdr;
+       unsigned long end, new_pos, new_end, nr_left, proccessed = 0;
+
+       new_pos = pos;
+       new_end = end = pos + nr_page;
+
+       if (pos % BITS_PER_LONG) {
+               unsigned long pos_s;
+
+               pos_s = rounddown(pos, BITS_PER_LONG);
+               hdr = (struct virtio_balloon_bmap_hdr *)(vb->resp_data
+                                                        + vb->resp_pos);
+               hdr->head.start_pfn = base_pfn + pos_s;
+               hdr->head.page_shift = PAGE_SHIFT;
+               hdr->head.bmap_len = sizeof(unsigned long);
+               hdr->bmap[0] = cpu_to_virtio64(vb->vdev,
+                                bitmap[pos_s / BITS_PER_LONG]);
+               vb->resp_pos += 2;
+               if (pos_s + BITS_PER_LONG >= end)
+                       return roundup(end, BITS_PER_LONG) - pos;
+               new_pos = roundup(pos, BITS_PER_LONG);
+       }
+
+       if (end % BITS_PER_LONG) {
+               unsigned long pos_e;
+
+               pos_e = roundup(end, BITS_PER_LONG);
+               hdr = (struct virtio_balloon_bmap_hdr *)(vb->resp_data
+                                                        + vb->resp_pos);
+               hdr->head.start_pfn = base_pfn + pos_e - BITS_PER_LONG;
+               hdr->head.page_shift = PAGE_SHIFT;
+               hdr->head.bmap_len = sizeof(unsigned long);
+               hdr->bmap[0] = bitmap[pos_e / BITS_PER_LONG - 1];
+               vb->resp_pos += 2;
+               if (new_pos + BITS_PER_LONG >= pos_e)
+                       return pos_e - pos;
+               new_end = rounddown(end, BITS_PER_LONG);
+       }
+
+       nr_left = nr_page = new_end - new_pos;
+
+       while (proccessed < nr_page) {
+               int bulk, order;
+
+               order = get_order(nr_left << PAGE_SHIFT);
+               if ((1 << order) > nr_left)
+                       order--;
+               hdr = (struct virtio_balloon_bmap_hdr *)(vb->resp_data
+                                                        + vb->resp_pos);
+               hdr->head.start_pfn = base_pfn + new_pos + proccessed;
+               hdr->head.page_shift = order + PAGE_SHIFT;
+               hdr->head.bmap_len = 0;
+               bulk = 1 << order;
+               nr_left -= bulk;
+               proccessed += bulk;
+               vb->resp_pos++;
+       }
+
+       return roundup(end, BITS_PER_LONG) - pos;
+}
+
+static void send_resp_data(struct virtio_balloon *vb, struct virtqueue *vq,
+                       bool busy_wait)
+{
+       struct scatterlist sg[2];
+       struct virtio_balloon_resp_hdr *hdr = vb->resp_hdr;
        unsigned int len;
 
-       sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+       len = hdr->data_len = vb->resp_pos * sizeof(unsigned long);
+       sg_init_table(sg, 2);
+       sg_set_buf(&sg[0], hdr, sizeof(struct virtio_balloon_resp_hdr));
+       sg_set_buf(&sg[1], vb->resp_data, len);
+
+       if (virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL) == 0) {
+               virtqueue_kick(vq);
+               if (busy_wait)
+                       while (!virtqueue_get_buf(vq, &len)
+                               && !virtqueue_is_broken(vq))
+                               cpu_relax();
+               else
+                       wait_event(vb->acked, virtqueue_get_buf(vq, &len));
+               vb->resp_pos = 0;
+               free_extended_page_bitmap(vb);
+       }
+}
 
-       /* We should always be able to add one buffer to an empty queue. */
-       virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
-       virtqueue_kick(vq);
+static void set_bulk_pages(struct virtio_balloon *vb, struct virtqueue *vq,
+               unsigned long start_pfn, unsigned long *bitmap,
+               unsigned long len, bool busy_wait)
+{
+       unsigned long pos = 0, end = len * BITS_PER_BYTE;
+
+       while (pos < end) {
+               unsigned long one = find_next_bit(bitmap, end, pos);
+
+               if ((vb->resp_pos + 64) * sizeof(unsigned long) >
+                        BALLOON_BMAP_SIZE)
+                       send_resp_data(vb, vq, busy_wait);
+               if (one < end) {
+                       unsigned long pages, zero;
+
+                       zero = find_next_zero_bit(bitmap, end, one + 1);
+                       if (zero >= end)
+                               pages = end - one;
+                       else
+                               pages = zero - one;
+                       if (pages) {
+                               pages = do_set_resp_bitmap(vb, bitmap,
+                                        start_pfn, one, pages);
+                       }
+                       pos = one + pages;
+               } else
+                       pos = one;
+       }
+}
 
-       /* When host has read buffer, this completes via balloon_ack */
-       wait_event(vb->acked, virtqueue_get_buf(vq, &len));
+static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+{
+       if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
+               int nr_pfn, nr_used_bmap, i;
+               unsigned long start_pfn, bmap_len;
+
+               start_pfn = vb->start_pfn;
+               nr_pfn = vb->end_pfn - start_pfn + 1;
+               nr_pfn = roundup(nr_pfn, BITS_PER_LONG);
+               nr_used_bmap = nr_pfn / PFNS_PER_BMAP;
+               if (nr_pfn % PFNS_PER_BMAP)
+                       nr_used_bmap++;
+               bmap_len = nr_pfn / BITS_PER_BYTE;
+
+               for (i = 0; i < nr_used_bmap; i++) {
+                       unsigned int bmap_size = BALLOON_BMAP_SIZE;
+
+                       if (i + 1 == nr_used_bmap)
+                               bmap_size = bmap_len - BALLOON_BMAP_SIZE * i;
+                       set_bulk_pages(vb, vq, start_pfn + i * PFNS_PER_BMAP,
+                                vb->page_bitmap[i], bmap_size, false);
+               }
+               if (vb->resp_pos > 0)
+                       send_resp_data(vb, vq, false);
+       } else {
+               struct scatterlist sg;
+               unsigned int len;
 
+               sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+
+               /* We should always be able to add one buffer to an
+                * empty queue
+                */
+               virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
+               virtqueue_kick(vq);
+               /* When host has read buffer, this completes via balloon_ack */
+               wait_event(vb->acked, virtqueue_get_buf(vq, &len));
+       }
 }
 
 static void set_page_pfns(struct virtio_balloon *vb,
@@ -138,13 +362,59 @@ static void set_page_pfns(struct virtio_balloon *vb,
                                          page_to_balloon_pfn(page) + i);
 }
 
-static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
+static void set_page_bitmap(struct virtio_balloon *vb,
+                        struct list_head *pages, struct virtqueue *vq)
+{
+       unsigned long pfn, pfn_limit;
+       struct page *page;
+       bool found;
+       int bmap_idx;
+
+       vb->min_pfn = rounddown(vb->min_pfn, BITS_PER_LONG);
+       vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG);
+       pfn_limit = PFNS_PER_BMAP * vb->nr_page_bmap;
+
+       if (vb->nr_page_bmap == 1)
+               extend_page_bitmap(vb, vb->max_pfn - vb->min_pfn + 1);
+       for (pfn = vb->min_pfn; pfn < vb->max_pfn; pfn += pfn_limit) {
+               unsigned long end_pfn;
+
+               clear_page_bitmap(vb);
+               vb->start_pfn = pfn;
+               end_pfn = pfn;
+               found = false;
+               list_for_each_entry(page, pages, lru) {
+                       unsigned long pos, balloon_pfn;
+
+                       balloon_pfn = page_to_balloon_pfn(page);
+                       if (balloon_pfn < pfn || balloon_pfn >= pfn + pfn_limit)
+                               continue;
+                       bmap_idx = (balloon_pfn - pfn) / PFNS_PER_BMAP;
+                       pos = (balloon_pfn - pfn) % PFNS_PER_BMAP;
+                       set_bit(pos, vb->page_bitmap[bmap_idx]);
+                       if (balloon_pfn > end_pfn)
+                               end_pfn = balloon_pfn;
+                       found = true;
+               }
+               if (found) {
+                       vb->end_pfn = end_pfn;
+                       tell_host(vb, vq);
+               }
+       }
+}
+
+static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num)
 {
        struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
-       unsigned num_allocated_pages;
+       unsigned int num_allocated_pages;
+       bool use_bmap = virtio_has_feature(vb->vdev,
+                                VIRTIO_BALLOON_F_PAGE_BITMAP);
 
-       /* We can only do one array worth at a time. */
-       num = min(num, ARRAY_SIZE(vb->pfns));
+       if (use_bmap)
+               init_bmap_pfn_range(vb);
+       else
+               /* We can only do one array worth at a time. */
+               num = min(num, ARRAY_SIZE(vb->pfns));
 
        mutex_lock(&vb->balloon_lock);
        for (vb->num_pfns = 0; vb->num_pfns < num;
@@ -159,7 +429,10 @@ static unsigned fill_balloon(struct virtio_balloon *vb, 
size_t num)
                        msleep(200);
                        break;
                }
-               set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+               if (use_bmap)
+                       update_bmap_pfn_range(vb, page);
+               else
+                       set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
                vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
                if (!virtio_has_feature(vb->vdev,
                                        VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
@@ -168,8 +441,13 @@ static unsigned fill_balloon(struct virtio_balloon *vb, 
size_t num)
 
        num_allocated_pages = vb->num_pfns;
        /* Did we get any? */
-       if (vb->num_pfns != 0)
-               tell_host(vb, vb->inflate_vq);
+       if (vb->num_pfns != 0) {
+               if (use_bmap)
+                       set_page_bitmap(vb, &vb_dev_info->pages,
+                                       vb->inflate_vq);
+               else
+                       tell_host(vb, vb->inflate_vq);
+       }
        mutex_unlock(&vb->balloon_lock);
 
        return num_allocated_pages;
@@ -189,15 +467,20 @@ static void release_pages_balloon(struct virtio_balloon 
*vb,
        }
 }
 
-static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
+static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num)
 {
-       unsigned num_freed_pages;
+       unsigned int num_freed_pages;
        struct page *page;
        struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
        LIST_HEAD(pages);
+       bool use_bmap = virtio_has_feature(vb->vdev,
+                        VIRTIO_BALLOON_F_PAGE_BITMAP);
 
-       /* We can only do one array worth at a time. */
-       num = min(num, ARRAY_SIZE(vb->pfns));
+       if (use_bmap)
+               init_bmap_pfn_range(vb);
+       else
+               /* We can only do one array worth at a time. */
+               num = min(num, ARRAY_SIZE(vb->pfns));
 
        mutex_lock(&vb->balloon_lock);
        /* We can't release more pages than taken */
@@ -207,7 +490,10 @@ static unsigned leak_balloon(struct virtio_balloon *vb, 
size_t num)
                page = balloon_page_dequeue(vb_dev_info);
                if (!page)
                        break;
-               set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+               if (use_bmap)
+                       update_bmap_pfn_range(vb, page);
+               else
+                       set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
                list_add(&page->lru, &pages);
                vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
        }
@@ -218,8 +504,12 @@ static unsigned leak_balloon(struct virtio_balloon *vb, 
size_t num)
         * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
         * is true, we *have* to do it in this order
         */
-       if (vb->num_pfns != 0)
-               tell_host(vb, vb->deflate_vq);
+       if (vb->num_pfns != 0) {
+               if (use_bmap)
+                       set_page_bitmap(vb, &pages, vb->deflate_vq);
+               else
+                       tell_host(vb, vb->deflate_vq);
+       }
        release_pages_balloon(vb, &pages);
        mutex_unlock(&vb->balloon_lock);
        return num_freed_pages;
@@ -431,6 +721,20 @@ static int init_vqs(struct virtio_balloon *vb)
 }
 
 #ifdef CONFIG_BALLOON_COMPACTION
+static void tell_host_one_page(struct virtio_balloon *vb,
+       struct virtqueue *vq, struct page *page)
+{
+       struct virtio_balloon_bmap_hdr *bmap_hdr;
+
+       bmap_hdr = (struct virtio_balloon_bmap_hdr *)(vb->resp_data
+                                                        + vb->resp_pos);
+       bmap_hdr->head.start_pfn = page_to_pfn(page);
+       bmap_hdr->head.page_shift = PAGE_SHIFT;
+       bmap_hdr->head.bmap_len = 0;
+       vb->resp_pos++;
+       send_resp_data(vb, vq, false);
+}
+
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
  *                          a compation thread.     (called under page lock)
@@ -455,6 +759,8 @@ static int virtballoon_migratepage(struct balloon_dev_info 
*vb_dev_info,
        struct virtio_balloon *vb = container_of(vb_dev_info,
                        struct virtio_balloon, vb_dev_info);
        unsigned long flags;
+       bool use_bmap = virtio_has_feature(vb->vdev,
+                                VIRTIO_BALLOON_F_PAGE_BITMAP);
 
        /*
         * In order to avoid lock contention while migrating pages concurrently
@@ -475,15 +781,23 @@ static int virtballoon_migratepage(struct 
balloon_dev_info *vb_dev_info,
        vb_dev_info->isolated_pages--;
        __count_vm_event(BALLOON_MIGRATE);
        spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
-       vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
-       set_page_pfns(vb, vb->pfns, newpage);
-       tell_host(vb, vb->inflate_vq);
+       if (use_bmap)
+               tell_host_one_page(vb, vb->inflate_vq, newpage);
+       else {
+               vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+               set_page_pfns(vb, vb->pfns, newpage);
+               tell_host(vb, vb->inflate_vq);
+       }
 
        /* balloon's page migration 2nd step -- deflate "page" */
        balloon_page_delete(page);
-       vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
-       set_page_pfns(vb, vb->pfns, page);
-       tell_host(vb, vb->deflate_vq);
+       if (use_bmap)
+               tell_host_one_page(vb, vb->deflate_vq, page);
+       else {
+               vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+               set_page_pfns(vb, vb->pfns, page);
+               tell_host(vb, vb->deflate_vq);
+       }
 
        mutex_unlock(&vb->balloon_lock);
 
@@ -533,6 +847,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
        spin_lock_init(&vb->stop_update_lock);
        vb->stop_update = false;
        vb->num_pages = 0;
+       vb->resp_hdr = kzalloc(sizeof(struct virtio_balloon_resp_hdr),
+                                GFP_KERNEL);
+       /* Clear the feature bit if memory allocation fails */
+       if (!vb->resp_hdr)
+               __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
+       else {
+               vb->page_bitmap[0] = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+               if (!vb->page_bitmap[0]) {
+                       __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
+                       kfree(vb->resp_hdr);
+               } else {
+                       vb->nr_page_bmap = 1;
+                       vb->resp_data = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+                       if (!vb->resp_data) {
+                               __virtio_clear_bit(vdev,
+                                               VIRTIO_BALLOON_F_PAGE_BITMAP);
+                               kfree(vb->page_bitmap[0]);
+                               kfree(vb->resp_hdr);
+                       }
+               }
+       }
+       vb->resp_pos = 0;
        mutex_init(&vb->balloon_lock);
        init_waitqueue_head(&vb->acked);
        vb->vdev = vdev;
@@ -611,6 +947,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
        remove_common(vb);
        if (vb->vb_dev_info.inode)
                iput(vb->vb_dev_info.inode);
+       kfree_page_bitmap(vb);
+       kfree(vb->resp_hdr);
        kfree(vb);
 }
 
@@ -649,6 +987,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
        VIRTIO_BALLOON_F_MUST_TELL_HOST,
        VIRTIO_BALLOON_F_STATS_VQ,
        VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+       VIRTIO_BALLOON_F_PAGE_BITMAP,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
-- 
1.8.3.1


---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org

Reply via email to