[PATCH kernel v4 4/7] virtio-balloon: speed up inflate/deflate process

2016-11-02 Thread Liang Li
The implementation of the current virtio-balloon is not very
efficient, the time spends on different stages of inflating
the balloon to 7GB of a 8GB idle guest:

a. allocating pages (6.5%)
b. sending PFNs to host (68.3%)
c. address translation (6.1%)
d. madvise (19%)

It takes about 4126ms for the inflating process to complete.
Debugging shows that the bottle neck are the stage b and stage d.

If using a bitmap to send the page info instead of the PFNs, we
can reduce the overhead in stage b quite a lot. Furthermore, we
can do the address translation and call madvise() with a bulk of
RAM pages, instead of the current page per page way, the overhead
of stage c and stage d can also be reduced a lot.

This patch is the kernel side implementation which is intended to
speed up the inflating & deflating process by adding a new feature
to the virtio-balloon device. With this new feature, inflating the
balloon to 7GB of a 8GB idle guest only takes 590ms, the
performance improvement is about 85%.

TODO: optimize stage a by allocating/freeing a chunk of pages
instead of a single page at a time.

Signed-off-by: Liang Li 
Suggested-by: Michael S. Tsirkin 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 drivers/virtio/virtio_balloon.c | 398 +---
 1 file changed, 369 insertions(+), 29 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 59ffe5a..c6c94b6 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -42,6 +42,10 @@
 #define OOM_VBALLOON_DEFAULT_PAGES 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define BALLOON_BMAP_SIZE  (8 * PAGE_SIZE)
+#define PFNS_PER_BMAP  (BALLOON_BMAP_SIZE * BITS_PER_BYTE)
+#define BALLOON_BMAP_COUNT 32
+
 static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
@@ -67,6 +71,18 @@ struct virtio_balloon {
 
/* Number of balloon pages we've told the Host we're not using. */
unsigned int num_pages;
+   /* Pointer to the response header. */
+   void *resp_hdr;
+   /* Pointer to the start address of response data. */
+   unsigned long *resp_data;
+   /* Pointer offset of the response data. */
+   unsigned long resp_pos;
+   /* Bitmap and bitmap count used to tell the host the pages */
+   unsigned long *page_bitmap[BALLOON_BMAP_COUNT];
+   /* Number of split page bitmaps */
+   unsigned int nr_page_bmap;
+   /* Used to record the processed pfn range */
+   unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
/*
 * The pages we've told the Host we're not using are enqueued
 * at vb_dev_info->pages list.
@@ -110,20 +126,227 @@ static void balloon_ack(struct virtqueue *vq)
wake_up(>acked);
 }
 
-static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+static inline void init_bmap_pfn_range(struct virtio_balloon *vb)
 {
-   struct scatterlist sg;
+   vb->min_pfn = ULONG_MAX;
+   vb->max_pfn = 0;
+}
+
+static inline void update_bmap_pfn_range(struct virtio_balloon *vb,
+struct page *page)
+{
+   unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+   vb->min_pfn = min(balloon_pfn, vb->min_pfn);
+   vb->max_pfn = max(balloon_pfn, vb->max_pfn);
+}
+
+static void extend_page_bitmap(struct virtio_balloon *vb)
+{
+   int i, bmap_count;
+   unsigned long bmap_len;
+
+   bmap_len = ALIGN(get_max_pfn(), BITS_PER_LONG) / BITS_PER_BYTE;
+   bmap_len = ALIGN(bmap_len, BALLOON_BMAP_SIZE);
+   bmap_count = min((int)(bmap_len / BALLOON_BMAP_SIZE),
+BALLOON_BMAP_COUNT);
+
+   for (i = 1; i < bmap_count; i++) {
+   vb->page_bitmap[i] = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+   if (vb->page_bitmap[i])
+   vb->nr_page_bmap++;
+   else
+   break;
+   }
+}
+
+static void free_extended_page_bitmap(struct virtio_balloon *vb)
+{
+   int i, bmap_count = vb->nr_page_bmap;
+
+
+   for (i = 1; i < bmap_count; i++) {
+   kfree(vb->page_bitmap[i]);
+   vb->page_bitmap[i] = NULL;
+   vb->nr_page_bmap--;
+   }
+}
+
+static void kfree_page_bitmap(struct virtio_balloon *vb)
+{
+   int i;
+
+   for (i = 0; i < vb->nr_page_bmap; i++)
+   kfree(vb->page_bitmap[i]);
+}
+
+static void clear_page_bitmap(struct virtio_balloon *vb)
+{
+   int i;
+
+   for (i = 0; i < vb->nr_page_bmap; i++)
+   memset(vb->page_bitmap[i], 0, BALLOON_BMAP_SIZE);
+}
+
+static unsigned long do_set_resp_bitmap(struct virtio_balloon *vb,
+   

[PATCH kernel v4 4/7] virtio-balloon: speed up inflate/deflate process

2016-11-02 Thread Liang Li
The implementation of the current virtio-balloon is not very
efficient, the time spends on different stages of inflating
the balloon to 7GB of a 8GB idle guest:

a. allocating pages (6.5%)
b. sending PFNs to host (68.3%)
c. address translation (6.1%)
d. madvise (19%)

It takes about 4126ms for the inflating process to complete.
Debugging shows that the bottle neck are the stage b and stage d.

If using a bitmap to send the page info instead of the PFNs, we
can reduce the overhead in stage b quite a lot. Furthermore, we
can do the address translation and call madvise() with a bulk of
RAM pages, instead of the current page per page way, the overhead
of stage c and stage d can also be reduced a lot.

This patch is the kernel side implementation which is intended to
speed up the inflating & deflating process by adding a new feature
to the virtio-balloon device. With this new feature, inflating the
balloon to 7GB of a 8GB idle guest only takes 590ms, the
performance improvement is about 85%.

TODO: optimize stage a by allocating/freeing a chunk of pages
instead of a single page at a time.

Signed-off-by: Liang Li 
Suggested-by: Michael S. Tsirkin 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 drivers/virtio/virtio_balloon.c | 398 +---
 1 file changed, 369 insertions(+), 29 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 59ffe5a..c6c94b6 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -42,6 +42,10 @@
 #define OOM_VBALLOON_DEFAULT_PAGES 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define BALLOON_BMAP_SIZE  (8 * PAGE_SIZE)
+#define PFNS_PER_BMAP  (BALLOON_BMAP_SIZE * BITS_PER_BYTE)
+#define BALLOON_BMAP_COUNT 32
+
 static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
@@ -67,6 +71,18 @@ struct virtio_balloon {
 
/* Number of balloon pages we've told the Host we're not using. */
unsigned int num_pages;
+   /* Pointer to the response header. */
+   void *resp_hdr;
+   /* Pointer to the start address of response data. */
+   unsigned long *resp_data;
+   /* Pointer offset of the response data. */
+   unsigned long resp_pos;
+   /* Bitmap and bitmap count used to tell the host the pages */
+   unsigned long *page_bitmap[BALLOON_BMAP_COUNT];
+   /* Number of split page bitmaps */
+   unsigned int nr_page_bmap;
+   /* Used to record the processed pfn range */
+   unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
/*
 * The pages we've told the Host we're not using are enqueued
 * at vb_dev_info->pages list.
@@ -110,20 +126,227 @@ static void balloon_ack(struct virtqueue *vq)
wake_up(>acked);
 }
 
-static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+static inline void init_bmap_pfn_range(struct virtio_balloon *vb)
 {
-   struct scatterlist sg;
+   vb->min_pfn = ULONG_MAX;
+   vb->max_pfn = 0;
+}
+
+static inline void update_bmap_pfn_range(struct virtio_balloon *vb,
+struct page *page)
+{
+   unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+   vb->min_pfn = min(balloon_pfn, vb->min_pfn);
+   vb->max_pfn = max(balloon_pfn, vb->max_pfn);
+}
+
+static void extend_page_bitmap(struct virtio_balloon *vb)
+{
+   int i, bmap_count;
+   unsigned long bmap_len;
+
+   bmap_len = ALIGN(get_max_pfn(), BITS_PER_LONG) / BITS_PER_BYTE;
+   bmap_len = ALIGN(bmap_len, BALLOON_BMAP_SIZE);
+   bmap_count = min((int)(bmap_len / BALLOON_BMAP_SIZE),
+BALLOON_BMAP_COUNT);
+
+   for (i = 1; i < bmap_count; i++) {
+   vb->page_bitmap[i] = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+   if (vb->page_bitmap[i])
+   vb->nr_page_bmap++;
+   else
+   break;
+   }
+}
+
+static void free_extended_page_bitmap(struct virtio_balloon *vb)
+{
+   int i, bmap_count = vb->nr_page_bmap;
+
+
+   for (i = 1; i < bmap_count; i++) {
+   kfree(vb->page_bitmap[i]);
+   vb->page_bitmap[i] = NULL;
+   vb->nr_page_bmap--;
+   }
+}
+
+static void kfree_page_bitmap(struct virtio_balloon *vb)
+{
+   int i;
+
+   for (i = 0; i < vb->nr_page_bmap; i++)
+   kfree(vb->page_bitmap[i]);
+}
+
+static void clear_page_bitmap(struct virtio_balloon *vb)
+{
+   int i;
+
+   for (i = 0; i < vb->nr_page_bmap; i++)
+   memset(vb->page_bitmap[i], 0, BALLOON_BMAP_SIZE);
+}
+
+static unsigned long do_set_resp_bitmap(struct virtio_balloon *vb,
+   unsigned long *bitmap,  unsigned long base_pfn,
+   unsigned long pos, int nr_page)
+
+{
+   struct virtio_balloon_bmap_hdr *hdr;
+