RE: [PATCH 2/6] virtio-balloon: speed up inflate/deflate process

2016-06-24 Thread Li, Liang Z
Hi Michael,

Thanks for your comments!

> 
> 2<< 30  is 2G but that is not a useful comment.
> pls explain what is the reason for this selection.
> 

Will change in the next version.

> > +struct balloon_bmap_hdr {
> > +   __virtio32 id;
> > +   __virtio32 page_shift;
> > +   __virtio64 start_pfn;
> > +   __virtio64 bmap_len;
> > +};
> > +
> 
> Put this in an uapi header please.

Will change in the next version.

> > +static inline void init_pfn_range(struct virtio_balloon *vb) {
> > +   vb->min_pfn = (1UL << 48);
> 
> Where does this value come from? Do you want ULONG_MAX?
> This does not fit in long on 32 bit systems.

I just want to make it big enough, ULONG_MAX is better. Will change it.

> >  static void tell_host(struct virtio_balloon *vb, struct virtqueue
> > *vq)  {
> > -   struct scatterlist sg;
> > unsigned int len;
> >
> > -   sg_init_one(, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > +   if (virtio_has_feature(vb->vdev,
> VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > +   struct balloon_bmap_hdr hdr;
> 
> why not init fields here?
> 
> > +   unsigned long bmap_len;
> 
> and here

All the fields and the bmap_len will be updated later, so init is unnecessary?
 
> > +   struct scatterlist sg[2];
> > +
> > +   hdr.id = cpu_to_virtio32(vb->vdev, 0);
> > +   hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
> > +   hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
> > +   bmap_len = min(vb->bmap_len,
> > +   (vb->end_pfn - vb->start_pfn) /
> BITS_PER_BYTE);
> > +   hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
> > +   sg_init_table(sg, 2);
> > +   sg_set_buf([0], , sizeof(hdr));
> > +   sg_set_buf([1], vb->page_bitmap, bmap_len);
> > +   virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);
> 
> might fail if queue size < 2. validate queue size and clear
> VIRTIO_BALLOON_F_PAGE_BITMAP?
> 
not considered yet.

> Alternatively, and I think preferably,
> use first struct balloon_bmap_hdr bytes in the buffer to pass the header to
> host.

How about the bitmap, in another sending?

> > +   struct page *page, *next;
> > +   bool find;
> 
> find -> found

Will change.

> > +   vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG);
> > +   for (pfn = vb->min_pfn; pfn < vb->max_pfn;
> > +   pfn += VIRTIO_BALLOON_PFNS_LIMIT) {
> > +   vb->start_pfn = pfn;
> > +   vb->end_pfn = pfn;
> > +   memset(vb->page_bitmap, 0, vb->bmap_len);
> > +   find = false;
> > +   list_for_each_entry_safe(page, next, pages, lru) {
> 
> Why _safe?

No safe is OK. Will change.

> > +   unsigned long balloon_pfn =
> page_to_balloon_pfn(page);
> > +
> > +   if (balloon_pfn < pfn ||
> > +balloon_pfn >= pfn +
> VIRTIO_BALLOON_PFNS_LIMIT)
> > +   continue;
> > +   set_bit(balloon_pfn - pfn, vb->page_bitmap);
> > +   if (balloon_pfn > vb->end_pfn)
> > +   vb->end_pfn = balloon_pfn;
> > +   find = true;
> 
> maybe remove page from list? this way we won't go over same entry
> multiple times ...

No, we can't remove the page from list. The list saves all the pages filled in 
the balloon,
When delating, we fetch the pages from the list to return them to guest.  
If removed, we can't find them.

> > unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> >
> > num_allocated_pages = vb->num_pfns;
> > /* Did we get any? */
> > -   if (vb->num_pfns != 0)
> > -   tell_host(vb, vb->inflate_vq);
> > +   if (vb->num_pfns != 0) {
> > +   if (use_bmap)
> > +   set_page_bitmap(vb, _dev_info->pages,
> > +vb->inflate_vq);
> 
> don't we need pages_lock if we access vb_dev_info->pages?

It is protected by the vb->balloon_lock. not enough?

> > +   vb->page_bitmap = kzalloc(vb->bmap_len, GFP_KERNEL);
> > +   if (!vb->page_bitmap) {
> > +   err = -ENOMEM;
> > +   goto out;
> > +   }
> 
> How about we clear the bitmap feature on this failure?

That' better.  Will change.

Thanks again!
Liang


RE: [PATCH 2/6] virtio-balloon: speed up inflate/deflate process

2016-06-24 Thread Li, Liang Z
Hi Michael,

Thanks for your comments!

> 
> 2<< 30  is 2G but that is not a useful comment.
> pls explain what is the reason for this selection.
> 

Will change in the next version.

> > +struct balloon_bmap_hdr {
> > +   __virtio32 id;
> > +   __virtio32 page_shift;
> > +   __virtio64 start_pfn;
> > +   __virtio64 bmap_len;
> > +};
> > +
> 
> Put this in an uapi header please.

Will change in the next version.

> > +static inline void init_pfn_range(struct virtio_balloon *vb) {
> > +   vb->min_pfn = (1UL << 48);
> 
> Where does this value come from? Do you want ULONG_MAX?
> This does not fit in long on 32 bit systems.

I just want to make it big enough, ULONG_MAX is better. Will change it.

> >  static void tell_host(struct virtio_balloon *vb, struct virtqueue
> > *vq)  {
> > -   struct scatterlist sg;
> > unsigned int len;
> >
> > -   sg_init_one(, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > +   if (virtio_has_feature(vb->vdev,
> VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > +   struct balloon_bmap_hdr hdr;
> 
> why not init fields here?
> 
> > +   unsigned long bmap_len;
> 
> and here

All the fields and the bmap_len will be updated later, so init is unnecessary?
 
> > +   struct scatterlist sg[2];
> > +
> > +   hdr.id = cpu_to_virtio32(vb->vdev, 0);
> > +   hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
> > +   hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
> > +   bmap_len = min(vb->bmap_len,
> > +   (vb->end_pfn - vb->start_pfn) /
> BITS_PER_BYTE);
> > +   hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
> > +   sg_init_table(sg, 2);
> > +   sg_set_buf([0], , sizeof(hdr));
> > +   sg_set_buf([1], vb->page_bitmap, bmap_len);
> > +   virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);
> 
> might fail if queue size < 2. validate queue size and clear
> VIRTIO_BALLOON_F_PAGE_BITMAP?
> 
not considered yet.

> Alternatively, and I think preferably,
> use first struct balloon_bmap_hdr bytes in the buffer to pass the header to
> host.

How about the bitmap, in another sending?

> > +   struct page *page, *next;
> > +   bool find;
> 
> find -> found

Will change.

> > +   vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG);
> > +   for (pfn = vb->min_pfn; pfn < vb->max_pfn;
> > +   pfn += VIRTIO_BALLOON_PFNS_LIMIT) {
> > +   vb->start_pfn = pfn;
> > +   vb->end_pfn = pfn;
> > +   memset(vb->page_bitmap, 0, vb->bmap_len);
> > +   find = false;
> > +   list_for_each_entry_safe(page, next, pages, lru) {
> 
> Why _safe?

No safe is OK. Will change.

> > +   unsigned long balloon_pfn =
> page_to_balloon_pfn(page);
> > +
> > +   if (balloon_pfn < pfn ||
> > +balloon_pfn >= pfn +
> VIRTIO_BALLOON_PFNS_LIMIT)
> > +   continue;
> > +   set_bit(balloon_pfn - pfn, vb->page_bitmap);
> > +   if (balloon_pfn > vb->end_pfn)
> > +   vb->end_pfn = balloon_pfn;
> > +   find = true;
> 
> maybe remove page from list? this way we won't go over same entry
> multiple times ...

No, we can't remove the page from list. The list saves all the pages filled in 
the balloon,
When delating, we fetch the pages from the list to return them to guest.  
If removed, we can't find them.

> > unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> >
> > num_allocated_pages = vb->num_pfns;
> > /* Did we get any? */
> > -   if (vb->num_pfns != 0)
> > -   tell_host(vb, vb->inflate_vq);
> > +   if (vb->num_pfns != 0) {
> > +   if (use_bmap)
> > +   set_page_bitmap(vb, _dev_info->pages,
> > +vb->inflate_vq);
> 
> don't we need pages_lock if we access vb_dev_info->pages?

It is protected by the vb->balloon_lock. not enough?

> > +   vb->page_bitmap = kzalloc(vb->bmap_len, GFP_KERNEL);
> > +   if (!vb->page_bitmap) {
> > +   err = -ENOMEM;
> > +   goto out;
> > +   }
> 
> How about we clear the bitmap feature on this failure?

That' better.  Will change.

Thanks again!
Liang


Re: [PATCH 2/6] virtio-balloon: speed up inflate/deflate process

2016-06-23 Thread Michael S. Tsirkin
On Mon, Jun 13, 2016 at 05:47:09PM +0800, Liang Li wrote:
> The implementation of the current virtio-balloon is not very efficient,
> Bellow is test result of time spends on inflating the balloon to 3GB of
> a 4GB idle guest:
> 
> a. allocating pages (6.5%, 103ms)
> b. sending PFNs to host (68.3%, 787ms)
> c. address translation (6.1%, 96ms)
> d. madvise (19%, 300ms)
> 
> It takes about 1577ms for the whole inflating process to complete. The
> test shows that the bottle neck is the stage b and stage d.
> 
> If using a bitmap to send the page info instead of the PFNs, we can
> reduce the overhead in stage b quite a lot. Furthermore, it's possible
> to do the address translation and the madvise with a bulk of pages,
> instead of the current page per page way, so the overhead of stage c
> and stage d can also be reduced a lot.
> 
> This patch is the kernel side implementation which is intended to speed
> up the inflating & deflating process by adding a new feature to the
> virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
> idle guest only takes 200ms, it's about 8 times as fast as before.
> 
> TODO: optimize stage a by allocating/freeing a chunk of pages instead
> of a single page at a time.
> 
> Signed-off-by: Liang Li 
> Suggested-by: Michael S. Tsirkin 
> Cc: Michael S. Tsirkin 
> Cc: Paolo Bonzini 
> Cc: Cornelia Huck 
> Cc: Amit Shah 

Causes kbuild warnings

> ---
>  drivers/virtio/virtio_balloon.c | 164 
> +++-
>  include/uapi/linux/virtio_balloon.h |   1 +
>  2 files changed, 144 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 8d649a2..1fa601b 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -40,11 +40,19 @@
>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
>  #define OOM_VBALLOON_DEFAULT_PAGES 256
>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> +#define VIRTIO_BALLOON_PFNS_LIMIT ((2 * (1ULL << 30)) >> PAGE_SHIFT) /* 2GB 
> */

2<< 30  is 2G but that is not a useful comment.
pls explain what is the reason for this selection.

>  
>  static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
>  module_param(oom_pages, int, S_IRUSR | S_IWUSR);
>  MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
>  
> +struct balloon_bmap_hdr {
> + __virtio32 id;
> + __virtio32 page_shift;
> + __virtio64 start_pfn;
> + __virtio64 bmap_len;
> +};
> +

Put this in an uapi header please.

>  struct virtio_balloon {
>   struct virtio_device *vdev;
>   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> @@ -62,6 +70,11 @@ struct virtio_balloon {
>  
>   /* Number of balloon pages we've told the Host we're not using. */
>   unsigned int num_pages;
> + /* Bitmap and length used to tell the host the pages */
> + unsigned long *page_bitmap;
> + unsigned long bmap_len;
> + /* Used to record the processed pfn range */
> + unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
>   /*
>* The pages we've told the Host we're not using are enqueued
>* at vb_dev_info->pages list.
> @@ -105,15 +118,51 @@ static void balloon_ack(struct virtqueue *vq)
>   wake_up(>acked);
>  }
>  
> +static inline void init_pfn_range(struct virtio_balloon *vb)
> +{
> + vb->min_pfn = (1UL << 48);

Where does this value come from? Do you want ULONG_MAX?
This does not fit in long on 32 bit systems.


> + vb->max_pfn = 0;
> +}
> +
> +static inline void update_pfn_range(struct virtio_balloon *vb,
> +  struct page *page)
> +{
> + unsigned long balloon_pfn = page_to_balloon_pfn(page);
> +
> + if (balloon_pfn < vb->min_pfn)
> + vb->min_pfn = balloon_pfn;
> + if (balloon_pfn > vb->max_pfn)
> + vb->max_pfn = balloon_pfn;
> +}
> +
>  static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
>  {
> - struct scatterlist sg;
>   unsigned int len;
>  
> - sg_init_one(, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> + struct balloon_bmap_hdr hdr;

why not init fields here?

> + unsigned long bmap_len;

and here

> + struct scatterlist sg[2];
> +
> + hdr.id = cpu_to_virtio32(vb->vdev, 0);
> + hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
> + hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
> + bmap_len = min(vb->bmap_len,
> + (vb->end_pfn - vb->start_pfn) / BITS_PER_BYTE);
> + hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
> + sg_init_table(sg, 2);
> + sg_set_buf([0], , sizeof(hdr));
> + sg_set_buf([1], vb->page_bitmap, bmap_len);
> + 

Re: [PATCH 2/6] virtio-balloon: speed up inflate/deflate process

2016-06-23 Thread Michael S. Tsirkin
On Mon, Jun 13, 2016 at 05:47:09PM +0800, Liang Li wrote:
> The implementation of the current virtio-balloon is not very efficient,
> Bellow is test result of time spends on inflating the balloon to 3GB of
> a 4GB idle guest:
> 
> a. allocating pages (6.5%, 103ms)
> b. sending PFNs to host (68.3%, 787ms)
> c. address translation (6.1%, 96ms)
> d. madvise (19%, 300ms)
> 
> It takes about 1577ms for the whole inflating process to complete. The
> test shows that the bottle neck is the stage b and stage d.
> 
> If using a bitmap to send the page info instead of the PFNs, we can
> reduce the overhead in stage b quite a lot. Furthermore, it's possible
> to do the address translation and the madvise with a bulk of pages,
> instead of the current page per page way, so the overhead of stage c
> and stage d can also be reduced a lot.
> 
> This patch is the kernel side implementation which is intended to speed
> up the inflating & deflating process by adding a new feature to the
> virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
> idle guest only takes 200ms, it's about 8 times as fast as before.
> 
> TODO: optimize stage a by allocating/freeing a chunk of pages instead
> of a single page at a time.
> 
> Signed-off-by: Liang Li 
> Suggested-by: Michael S. Tsirkin 
> Cc: Michael S. Tsirkin 
> Cc: Paolo Bonzini 
> Cc: Cornelia Huck 
> Cc: Amit Shah 

Causes kbuild warnings

> ---
>  drivers/virtio/virtio_balloon.c | 164 
> +++-
>  include/uapi/linux/virtio_balloon.h |   1 +
>  2 files changed, 144 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 8d649a2..1fa601b 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -40,11 +40,19 @@
>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
>  #define OOM_VBALLOON_DEFAULT_PAGES 256
>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> +#define VIRTIO_BALLOON_PFNS_LIMIT ((2 * (1ULL << 30)) >> PAGE_SHIFT) /* 2GB 
> */

2<< 30  is 2G but that is not a useful comment.
pls explain what is the reason for this selection.

>  
>  static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
>  module_param(oom_pages, int, S_IRUSR | S_IWUSR);
>  MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
>  
> +struct balloon_bmap_hdr {
> + __virtio32 id;
> + __virtio32 page_shift;
> + __virtio64 start_pfn;
> + __virtio64 bmap_len;
> +};
> +

Put this in an uapi header please.

>  struct virtio_balloon {
>   struct virtio_device *vdev;
>   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> @@ -62,6 +70,11 @@ struct virtio_balloon {
>  
>   /* Number of balloon pages we've told the Host we're not using. */
>   unsigned int num_pages;
> + /* Bitmap and length used to tell the host the pages */
> + unsigned long *page_bitmap;
> + unsigned long bmap_len;
> + /* Used to record the processed pfn range */
> + unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
>   /*
>* The pages we've told the Host we're not using are enqueued
>* at vb_dev_info->pages list.
> @@ -105,15 +118,51 @@ static void balloon_ack(struct virtqueue *vq)
>   wake_up(>acked);
>  }
>  
> +static inline void init_pfn_range(struct virtio_balloon *vb)
> +{
> + vb->min_pfn = (1UL << 48);

Where does this value come from? Do you want ULONG_MAX?
This does not fit in long on 32 bit systems.


> + vb->max_pfn = 0;
> +}
> +
> +static inline void update_pfn_range(struct virtio_balloon *vb,
> +  struct page *page)
> +{
> + unsigned long balloon_pfn = page_to_balloon_pfn(page);
> +
> + if (balloon_pfn < vb->min_pfn)
> + vb->min_pfn = balloon_pfn;
> + if (balloon_pfn > vb->max_pfn)
> + vb->max_pfn = balloon_pfn;
> +}
> +
>  static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
>  {
> - struct scatterlist sg;
>   unsigned int len;
>  
> - sg_init_one(, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> + struct balloon_bmap_hdr hdr;

why not init fields here?

> + unsigned long bmap_len;

and here

> + struct scatterlist sg[2];
> +
> + hdr.id = cpu_to_virtio32(vb->vdev, 0);
> + hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
> + hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
> + bmap_len = min(vb->bmap_len,
> + (vb->end_pfn - vb->start_pfn) / BITS_PER_BYTE);
> + hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
> + sg_init_table(sg, 2);
> + sg_set_buf([0], , sizeof(hdr));
> + sg_set_buf([1], vb->page_bitmap, bmap_len);
> + virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);

might fail if queue size < 2. validate queue size and clear
VIRTIO_BALLOON_F_PAGE_BITMAP?


Re: [PATCH 2/6] virtio-balloon: speed up inflate/deflate process

2016-06-13 Thread kbuild test robot
Hi,

[auto build test WARNING on v4.7-rc3]
[cannot apply to next-20160609]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Liang-Li/Fast-balloon-fast-live-migration/20160613-175812
config: m68k-allyesconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 4.9.0
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=m68k 

All warnings (new ones prefixed by >>):

   drivers/virtio/virtio_balloon.c: In function 'init_pfn_range':
>> drivers/virtio/virtio_balloon.c:123:2: warning: left shift count >= width of 
>> type
 vb->min_pfn = (1UL << 48);
 ^

vim +123 drivers/virtio/virtio_balloon.c

   107  unsigned long pfn = page_to_pfn(page);
   108  
   109  BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
   110  /* Convert pfn from Linux page size to balloon page size. */
   111  return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
   112  }
   113  
   114  static void balloon_ack(struct virtqueue *vq)
   115  {
   116  struct virtio_balloon *vb = vq->vdev->priv;
   117  
   118  wake_up(>acked);
   119  }
   120  
   121  static inline void init_pfn_range(struct virtio_balloon *vb)
   122  {
 > 123  vb->min_pfn = (1UL << 48);
   124  vb->max_pfn = 0;
   125  }
   126  
   127  static inline void update_pfn_range(struct virtio_balloon *vb,
   128   struct page *page)
   129  {
   130  unsigned long balloon_pfn = page_to_balloon_pfn(page);
   131  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH 2/6] virtio-balloon: speed up inflate/deflate process

2016-06-13 Thread kbuild test robot
Hi,

[auto build test WARNING on v4.7-rc3]
[cannot apply to next-20160609]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Liang-Li/Fast-balloon-fast-live-migration/20160613-175812
config: m68k-allyesconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 4.9.0
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=m68k 

All warnings (new ones prefixed by >>):

   drivers/virtio/virtio_balloon.c: In function 'init_pfn_range':
>> drivers/virtio/virtio_balloon.c:123:2: warning: left shift count >= width of 
>> type
 vb->min_pfn = (1UL << 48);
 ^

vim +123 drivers/virtio/virtio_balloon.c

   107  unsigned long pfn = page_to_pfn(page);
   108  
   109  BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
   110  /* Convert pfn from Linux page size to balloon page size. */
   111  return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
   112  }
   113  
   114  static void balloon_ack(struct virtqueue *vq)
   115  {
   116  struct virtio_balloon *vb = vq->vdev->priv;
   117  
   118  wake_up(>acked);
   119  }
   120  
   121  static inline void init_pfn_range(struct virtio_balloon *vb)
   122  {
 > 123  vb->min_pfn = (1UL << 48);
   124  vb->max_pfn = 0;
   125  }
   126  
   127  static inline void update_pfn_range(struct virtio_balloon *vb,
   128   struct page *page)
   129  {
   130  unsigned long balloon_pfn = page_to_balloon_pfn(page);
   131  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


[PATCH 2/6] virtio-balloon: speed up inflate/deflate process

2016-06-13 Thread Liang Li
The implementation of the current virtio-balloon is not very efficient,
Bellow is test result of time spends on inflating the balloon to 3GB of
a 4GB idle guest:

a. allocating pages (6.5%, 103ms)
b. sending PFNs to host (68.3%, 787ms)
c. address translation (6.1%, 96ms)
d. madvise (19%, 300ms)

It takes about 1577ms for the whole inflating process to complete. The
test shows that the bottle neck is the stage b and stage d.

If using a bitmap to send the page info instead of the PFNs, we can
reduce the overhead in stage b quite a lot. Furthermore, it's possible
to do the address translation and the madvise with a bulk of pages,
instead of the current page per page way, so the overhead of stage c
and stage d can also be reduced a lot.

This patch is the kernel side implementation which is intended to speed
up the inflating & deflating process by adding a new feature to the
virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
idle guest only takes 200ms, it's about 8 times as fast as before.

TODO: optimize stage a by allocating/freeing a chunk of pages instead
of a single page at a time.

Signed-off-by: Liang Li 
Suggested-by: Michael S. Tsirkin 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
---
 drivers/virtio/virtio_balloon.c | 164 +++-
 include/uapi/linux/virtio_balloon.h |   1 +
 2 files changed, 144 insertions(+), 21 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 8d649a2..1fa601b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -40,11 +40,19 @@
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 #define OOM_VBALLOON_DEFAULT_PAGES 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+#define VIRTIO_BALLOON_PFNS_LIMIT ((2 * (1ULL << 30)) >> PAGE_SHIFT) /* 2GB */
 
 static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
 
+struct balloon_bmap_hdr {
+   __virtio32 id;
+   __virtio32 page_shift;
+   __virtio64 start_pfn;
+   __virtio64 bmap_len;
+};
+
 struct virtio_balloon {
struct virtio_device *vdev;
struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
@@ -62,6 +70,11 @@ struct virtio_balloon {
 
/* Number of balloon pages we've told the Host we're not using. */
unsigned int num_pages;
+   /* Bitmap and length used to tell the host the pages */
+   unsigned long *page_bitmap;
+   unsigned long bmap_len;
+   /* Used to record the processed pfn range */
+   unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
/*
 * The pages we've told the Host we're not using are enqueued
 * at vb_dev_info->pages list.
@@ -105,15 +118,51 @@ static void balloon_ack(struct virtqueue *vq)
wake_up(>acked);
 }
 
+static inline void init_pfn_range(struct virtio_balloon *vb)
+{
+   vb->min_pfn = (1UL << 48);
+   vb->max_pfn = 0;
+}
+
+static inline void update_pfn_range(struct virtio_balloon *vb,
+struct page *page)
+{
+   unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+   if (balloon_pfn < vb->min_pfn)
+   vb->min_pfn = balloon_pfn;
+   if (balloon_pfn > vb->max_pfn)
+   vb->max_pfn = balloon_pfn;
+}
+
 static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 {
-   struct scatterlist sg;
unsigned int len;
 
-   sg_init_one(, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
+   struct balloon_bmap_hdr hdr;
+   unsigned long bmap_len;
+   struct scatterlist sg[2];
+
+   hdr.id = cpu_to_virtio32(vb->vdev, 0);
+   hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
+   hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
+   bmap_len = min(vb->bmap_len,
+   (vb->end_pfn - vb->start_pfn) / BITS_PER_BYTE);
+   hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
+   sg_init_table(sg, 2);
+   sg_set_buf([0], , sizeof(hdr));
+   sg_set_buf([1], vb->page_bitmap, bmap_len);
+   virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);
+   } else {
+   struct scatterlist sg;
 
-   /* We should always be able to add one buffer to an empty queue. */
-   virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL);
+   sg_init_one(, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+   /* We should always be able to add one buffer to an
+   * empty queue.
+   */
+   virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL);
+   }

[PATCH 2/6] virtio-balloon: speed up inflate/deflate process

2016-06-13 Thread Liang Li
The implementation of the current virtio-balloon is not very efficient,
Bellow is test result of time spends on inflating the balloon to 3GB of
a 4GB idle guest:

a. allocating pages (6.5%, 103ms)
b. sending PFNs to host (68.3%, 787ms)
c. address translation (6.1%, 96ms)
d. madvise (19%, 300ms)

It takes about 1577ms for the whole inflating process to complete. The
test shows that the bottle neck is the stage b and stage d.

If using a bitmap to send the page info instead of the PFNs, we can
reduce the overhead in stage b quite a lot. Furthermore, it's possible
to do the address translation and the madvise with a bulk of pages,
instead of the current page per page way, so the overhead of stage c
and stage d can also be reduced a lot.

This patch is the kernel side implementation which is intended to speed
up the inflating & deflating process by adding a new feature to the
virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
idle guest only takes 200ms, it's about 8 times as fast as before.

TODO: optimize stage a by allocating/freeing a chunk of pages instead
of a single page at a time.

Signed-off-by: Liang Li 
Suggested-by: Michael S. Tsirkin 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
---
 drivers/virtio/virtio_balloon.c | 164 +++-
 include/uapi/linux/virtio_balloon.h |   1 +
 2 files changed, 144 insertions(+), 21 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 8d649a2..1fa601b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -40,11 +40,19 @@
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 #define OOM_VBALLOON_DEFAULT_PAGES 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+#define VIRTIO_BALLOON_PFNS_LIMIT ((2 * (1ULL << 30)) >> PAGE_SHIFT) /* 2GB */
 
 static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
 
+struct balloon_bmap_hdr {
+   __virtio32 id;
+   __virtio32 page_shift;
+   __virtio64 start_pfn;
+   __virtio64 bmap_len;
+};
+
 struct virtio_balloon {
struct virtio_device *vdev;
struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
@@ -62,6 +70,11 @@ struct virtio_balloon {
 
/* Number of balloon pages we've told the Host we're not using. */
unsigned int num_pages;
+   /* Bitmap and length used to tell the host the pages */
+   unsigned long *page_bitmap;
+   unsigned long bmap_len;
+   /* Used to record the processed pfn range */
+   unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
/*
 * The pages we've told the Host we're not using are enqueued
 * at vb_dev_info->pages list.
@@ -105,15 +118,51 @@ static void balloon_ack(struct virtqueue *vq)
wake_up(>acked);
 }
 
+static inline void init_pfn_range(struct virtio_balloon *vb)
+{
+   vb->min_pfn = (1UL << 48);
+   vb->max_pfn = 0;
+}
+
+static inline void update_pfn_range(struct virtio_balloon *vb,
+struct page *page)
+{
+   unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+   if (balloon_pfn < vb->min_pfn)
+   vb->min_pfn = balloon_pfn;
+   if (balloon_pfn > vb->max_pfn)
+   vb->max_pfn = balloon_pfn;
+}
+
 static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 {
-   struct scatterlist sg;
unsigned int len;
 
-   sg_init_one(, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
+   struct balloon_bmap_hdr hdr;
+   unsigned long bmap_len;
+   struct scatterlist sg[2];
+
+   hdr.id = cpu_to_virtio32(vb->vdev, 0);
+   hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
+   hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
+   bmap_len = min(vb->bmap_len,
+   (vb->end_pfn - vb->start_pfn) / BITS_PER_BYTE);
+   hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
+   sg_init_table(sg, 2);
+   sg_set_buf([0], , sizeof(hdr));
+   sg_set_buf([1], vb->page_bitmap, bmap_len);
+   virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);
+   } else {
+   struct scatterlist sg;
 
-   /* We should always be able to add one buffer to an empty queue. */
-   virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL);
+   sg_init_one(, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+   /* We should always be able to add one buffer to an
+   * empty queue.
+   */
+   virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL);
+   }
virtqueue_kick(vq);
 
/* When host has read buffer, this completes via balloon_ack */
@@ -133,13 +182,50 @@ static void