Re: [Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest

2017-08-23 Thread Olaf Hering
On Wed, Aug 23, Olaf Hering wrote:

> The value of p2m_size does not represent the actual number of pages
> assigned to a domU. This info is stored in getdomaininfo.max_pages,
> which is currently not used by restore. I will see if using this value
> will avoid triggering the Over-allocation check.

This untested change ontop of this series (done with git diff -w -b
base..HEAD) does some accounting to avoid Over-allocation:

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index 26c45fdd6d..e0321ea224 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -234,6 +234,8 @@ struct xc_sr_context
 
 int send_back_fd;
 unsigned long p2m_size;
+unsigned long max_pages;
+unsigned long tot_pages;
 xc_hypercall_buffer_t dirty_bitmap_hbuf;
 
 /* From Image Header. */
@@ -375,6 +377,7 @@ static inline bool xc_sr_bitmap_resize(struct xc_sr_bitmap 
*bm, unsigned long bi
 static inline void xc_sr_bitmap_free(struct xc_sr_bitmap *bm)
 {
 free(bm->p);
+bm->p = NULL;
 }
 
 static inline bool xc_sr_set_bit(unsigned long bit, struct xc_sr_bitmap *bm)
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 1f9fe25b8f..eff24d3805 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -758,6 +758,9 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 return -1;
 }
 
+/* See xc_domain_getinfo */
+ctx.restore.max_pages = ctx.dominfo.max_memkb >> (PAGE_SHIFT-10);
+ctx.restore.tot_pages = ctx.dominfo.nr_pages;
 ctx.restore.p2m_size = nr_pfns;
 
 if ( ctx.dominfo.hvm )
diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c 
b/tools/libxc/xc_sr_restore_x86_hvm.c
index 60454148db..f2932dafb7 100644
--- a/tools/libxc/xc_sr_restore_x86_hvm.c
+++ b/tools/libxc/xc_sr_restore_x86_hvm.c
@@ -278,7 +278,8 @@ static int pfn_set_allocated(struct xc_sr_context *ctx, 
xen_pfn_t pfn)
 static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
 {
 xc_interface *xch = ctx->xch;
-bool success = false;
+struct xc_sr_bitmap *bm;
+bool success = false, do_sp;
 int rc = -1, done;
 unsigned int order;
 unsigned long i;
@@ -303,15 +304,18 @@ static int x86_hvm_allocate_pfn(struct xc_sr_context 
*ctx, xen_pfn_t pfn)
 return -1;
 }
 DPRINTF("idx_1g %lu idx_2m %lu\n", idx_1g, idx_2m);
-if (!xc_sr_test_and_set_bit(idx_1g, &ctx->x86_hvm.restore.attempted_1g)) {
+
+bm = &ctx->x86_hvm.restore.attempted_1g;
 order = SUPERPAGE_1GB_SHIFT;
 count = 1UL << order;
+do_sp = ctx->restore.tot_pages + count <= ctx->restore.max_pages;
+if ( do_sp && !xc_sr_test_and_set_bit(idx_1g, bm) ) {
 base_pfn = (pfn >> order) << order;
 extnt = base_pfn;
 done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, 
&extnt);
 DPRINTF("1G base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done);
 if ( done > 0 ) {
-struct xc_sr_bitmap *bm = &ctx->x86_hvm.restore.attempted_2m;
+bm = &ctx->x86_hvm.restore.attempted_2m;
 success = true;
 stat_1g = done;
 for ( i = 0; i < (count >> SUPERPAGE_2MB_SHIFT); i++ )
@@ -319,9 +323,11 @@ static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, 
xen_pfn_t pfn)
 }
 }
 
-if (!xc_sr_test_and_set_bit(idx_2m, &ctx->x86_hvm.restore.attempted_2m)) {
+bm = &ctx->x86_hvm.restore.attempted_2m;
 order = SUPERPAGE_2MB_SHIFT;
 count = 1UL << order;
+do_sp = ctx->restore.tot_pages + count <= ctx->restore.max_pages;
+if ( do_sp && !xc_sr_test_and_set_bit(idx_2m, bm) ) {
 base_pfn = (pfn >> order) << order;
 extnt = base_pfn;
 done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, 
&extnt);
@@ -344,6 +350,7 @@ static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, 
xen_pfn_t pfn)
 if ( success == true ) {
 do {
 count--;
+ctx->restore.tot_pages++;
 rc = pfn_set_allocated(ctx, base_pfn + count);
 if ( rc )
 break;
@@ -396,6 +403,7 @@ static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, 
unsigned count,
 PERROR("Failed to release pfn %" PRI_xen_pfn, min_pfn);
 goto err;
 }
+ctx->restore.tot_pages--;
 }
 min_pfn++;
 }

Olaf


signature.asc
Description: PGP signature
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest

2017-08-23 Thread Olaf Hering
On Wed, Aug 23, Wei Liu wrote:

> On Tue, Aug 22, 2017 at 05:53:25PM +0200, Olaf Hering wrote:
> > In my testing I have seen the case of over-allocation. Thats why I
> > implemented the freeing of unpopulated parts. It would be nice to know
> > how many pages are actually coming. I think this info is not available.
> Not sure I follow. What do you mean by "how many pages are actually
> coming"?

This meant the expected number of pages to populate.

The value of p2m_size does not represent the actual number of pages
assigned to a domU. This info is stored in getdomaininfo.max_pages,
which is currently not used by restore. I will see if using this value
will avoid triggering the Over-allocation check.

> > On the other side, the first iteration sends the pfns linear. This is
> > when the allocation actually happens. So the over-allocation will only
> > trigger near the end, if a 1G range is allocated but only a few pages
> > will be stored into this range.
> This could be making too many assumptions on the data stream.

With the usage of max_pages some assumptions can be avoided.

Olaf


signature.asc
Description: PGP signature
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest

2017-08-23 Thread Wei Liu
On Wed, Aug 23, 2017 at 10:05:37AM +0200, Olaf Hering wrote:
> On Tue, Aug 22, Olaf Hering wrote:
> 
> > In my testing I have seen the case of over-allocation. Thats why I
> > implemented the freeing of unpopulated parts. It would be nice to know
> > how many pages are actually coming. I think this info is not available.
> 
> If the receiving dom0 recognizes an over-allocation it must know how
> much memory a domU is supposed to have. Perhaps there is a way to
> retreive this info.
> 

Dom0 probably gets an error from Xen about failing to allocate memory,
but I'm not sure it can tell whether it is due to DomU trying to use
more than it should or Xen is oom.

> An interesting case is ballooning during migration. Is the new amount of
> pages per domU actually transfered to the receiving domU? If the domU is
> ballooned up the other side may see the incoming domU as over-allocated.
> If it is ballooned down pages may be missing. Was this ever considered?
> 

No, I don't think that's covered.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest

2017-08-23 Thread Wei Liu
On Tue, Aug 22, 2017 at 05:53:25PM +0200, Olaf Hering wrote:
> On Tue, Aug 22, Wei Liu wrote:
> 
> > On Thu, Aug 17, 2017 at 07:01:33PM +0200, Olaf Hering wrote:
> > > +/* No superpage in 1st 2MB due to VGA hole */
> > > +xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g);
> > > +xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m);
> > I don't quite get this. What about other holes such as MMIO?
> 
> This just copies what meminit_hvm does. Is there a way to know where the
> MMIO hole is? Maybe I just missed the MMIO part. In the worst case I
> think a super page is allocated, which is later split into single pages.

That's a bit different from migration. IIRC hvmloader is responsible for
shuffling pages around. I don't think that is applicable to migration.
> 
> > One potential issue I can see with your algorithm is, if the stream of
> > page info contains pages from different super pages, the risk of going
> > over memory limit is high (hence failing the migration).
> > 
> > Is my concern unfounded?
> 
> In my testing I have seen the case of over-allocation. Thats why I
> implemented the freeing of unpopulated parts. It would be nice to know
> how many pages are actually coming. I think this info is not available.
> 

Not sure I follow. What do you mean by "how many pages are actually
coming"?

> On the other side, the first iteration sends the pfns linear. This is
> when the allocation actually happens. So the over-allocation will only
> trigger near the end, if a 1G range is allocated but only a few pages
> will be stored into this range.

This could be making too many assumptions on the data stream.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest

2017-08-23 Thread Olaf Hering
On Tue, Aug 22, Olaf Hering wrote:

> In my testing I have seen the case of over-allocation. Thats why I
> implemented the freeing of unpopulated parts. It would be nice to know
> how many pages are actually coming. I think this info is not available.

If the receiving dom0 recognizes an over-allocation it must know how
much memory a domU is supposed to have. Perhaps there is a way to
retreive this info.

An interesting case is ballooning during migration. Is the new amount of
pages per domU actually transfered to the receiving domU? If the domU is
ballooned up the other side may see the incoming domU as over-allocated.
If it is ballooned down pages may be missing. Was this ever considered?


Olaf


signature.asc
Description: PGP signature
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest

2017-08-22 Thread Olaf Hering
On Tue, Aug 22, Wei Liu wrote:

> On Thu, Aug 17, 2017 at 07:01:33PM +0200, Olaf Hering wrote:
> > +/* No superpage in 1st 2MB due to VGA hole */
> > +xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g);
> > +xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m);
> I don't quite get this. What about other holes such as MMIO?

This just copies what meminit_hvm does. Is there a way to know where the
MMIO hole is? Maybe I just missed the MMIO part. In the worst case I
think a super page is allocated, which is later split into single pages.

> One potential issue I can see with your algorithm is, if the stream of
> page info contains pages from different super pages, the risk of going
> over memory limit is high (hence failing the migration).
> 
> Is my concern unfounded?

In my testing I have seen the case of over-allocation. Thats why I
implemented the freeing of unpopulated parts. It would be nice to know
how many pages are actually coming. I think this info is not available.

On the other side, the first iteration sends the pfns linear. This is
when the allocation actually happens. So the over-allocation will only
trigger near the end, if a 1G range is allocated but only a few pages
will be stored into this range.

Olaf


signature.asc
Description: PGP signature
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest

2017-08-22 Thread Wei Liu
On Thu, Aug 17, 2017 at 07:01:33PM +0200, Olaf Hering wrote:
[...]
> diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c 
> b/tools/libxc/xc_sr_restore_x86_hvm.c
> index 1dca85354a..60454148db 100644
> --- a/tools/libxc/xc_sr_restore_x86_hvm.c
> +++ b/tools/libxc/xc_sr_restore_x86_hvm.c
> @@ -135,6 +135,8 @@ static int x86_hvm_localise_page(struct xc_sr_context 
> *ctx,
>  static int x86_hvm_setup(struct xc_sr_context *ctx)
>  {
>  xc_interface *xch = ctx->xch;
> +struct xc_sr_bitmap *bm;
> +unsigned long bits;
>  
>  if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM )
>  {
> @@ -149,7 +151,30 @@ static int x86_hvm_setup(struct xc_sr_context *ctx)
>  return -1;
>  }
>  
> +bm = &ctx->x86_hvm.restore.attempted_1g;
> +bits = (ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1;
> +if ( xc_sr_bitmap_resize(bm, bits) == false )
> +goto out;
> +
> +bm = &ctx->x86_hvm.restore.attempted_2m;
> +bits = (ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1;
> +if ( xc_sr_bitmap_resize(bm, bits) == false )
> +goto out;
> +
> +bm = &ctx->x86_hvm.restore.allocated_pfns;
> +bits = ctx->restore.p2m_size + 1;
> +if ( xc_sr_bitmap_resize(bm, bits) == false )
> +goto out;
> +
> +/* No superpage in 1st 2MB due to VGA hole */
> +xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g);
> +xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m);
> +

I don't quite get this. What about other holes such as MMIO?

>  return 0;
> +
> +out:
> +ERROR("Unable to allocate memory for pfn bitmaps");
> +return -1;
>  }
>  
>  /*
> @@ -224,10 +249,164 @@ static int x86_hvm_stream_complete(struct 
> xc_sr_context *ctx)
>  static int x86_hvm_cleanup(struct xc_sr_context *ctx)
>  {
>  free(ctx->x86_hvm.restore.context);
> +xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_1g);
> +xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_2m);
> +xc_sr_bitmap_free(&ctx->x86_hvm.restore.allocated_pfns);
>  
>  return 0;
>  }
>  
> +/*
> + * Set a pfn as allocated, expanding the tracking structures if needed.
> + */
> +static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn)
> +{
> +xc_interface *xch = ctx->xch;
> +
> +if ( !xc_sr_set_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) )
> +{
> +ERROR("Failed to realloc allocated_pfns bitmap");
> +errno = ENOMEM;
> +return -1;
> +}
> +return 0;
> +}
> +
> +/*
> + * Attempt to allocate a superpage where the pfn resides.
> + */
> +static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
> +{
> +xc_interface *xch = ctx->xch;
> +bool success = false;
> +int rc = -1, done;
> +unsigned int order;
> +unsigned long i;
> +unsigned long stat_1g = 0, stat_2m = 0, stat_4k = 0;
> +unsigned long idx_1g, idx_2m;
> +unsigned long count;
> +xen_pfn_t base_pfn = 0, extnt;
> +
> +if (xc_sr_test_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns))

Style is wrong here and in some other places.

> +return 0;
> +
> +idx_1g = pfn >> SUPERPAGE_1GB_SHIFT;
> +idx_2m = pfn >> SUPERPAGE_2MB_SHIFT;
> +if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, idx_1g))
> +{
> +PERROR("Failed to realloc attempted_1g");
> +return -1;
> +}
> +if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, idx_2m))
> +{
> +PERROR("Failed to realloc attempted_2m");
> +return -1;
> +}
> +DPRINTF("idx_1g %lu idx_2m %lu\n", idx_1g, idx_2m);
> +if (!xc_sr_test_and_set_bit(idx_1g, &ctx->x86_hvm.restore.attempted_1g)) 
> {
> +order = SUPERPAGE_1GB_SHIFT;
> +count = 1UL << order;
> +base_pfn = (pfn >> order) << order;
> +extnt = base_pfn;
> +done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, 
> &extnt);
> +DPRINTF("1G base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done);
> +if (done > 0) {
> +struct xc_sr_bitmap *bm = &ctx->x86_hvm.restore.attempted_2m;
> +success = true;
> +stat_1g = done;
> +for (i = 0; i < (count >> SUPERPAGE_2MB_SHIFT); i++)
> +xc_sr_set_bit((base_pfn >> SUPERPAGE_2MB_SHIFT) + i, bm);
> +}
> +}
> +
> +if (!xc_sr_test_and_set_bit(idx_2m, &ctx->x86_hvm.restore.attempted_2m)) 
> {
> +order = SUPERPAGE_2MB_SHIFT;
> +count = 1UL << order;
> +base_pfn = (pfn >> order) << order;
> +extnt = base_pfn;
> +done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, 
> &extnt);
> +DPRINTF("2M base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done);
> +if (done > 0) {
> +success = true;
> +stat_2m = done;
> +}
> +}
> +if (success == false) {
> +count = 1;
> +extnt = base_pfn = pfn;
> +done = xc_domain_populate_physmap(xch, ctx->domid, count, 0, 0, 
> &extnt);
> +

[Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest

2017-08-17 Thread Olaf Hering
During creating of a HVM domU meminit_hvm() tries to map superpages.
After save/restore or migration this mapping is lost, everything is
allocated in single pages. This causes a performance degradition after
migration.

Add neccessary code to preallocate a superpage for the chunk of pfns
that is received. In case a pfn was not populated on the sending side it
must be freed on the receiving side to avoid over-allocation.

The existing code for x86_pv is moved unmodified into its own file.

Signed-off-by: Olaf Hering 
---
 tools/libxc/xc_sr_common.h  |  15 +++
 tools/libxc/xc_sr_restore.c |  70 +-
 tools/libxc/xc_sr_restore_x86_hvm.c | 180 
 tools/libxc/xc_sr_restore_x86_pv.c  |  72 ++-
 4 files changed, 267 insertions(+), 70 deletions(-)

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index 5d78f461af..26c45fdd6d 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -139,6 +139,16 @@ struct xc_sr_restore_ops
  */
 int (*setup)(struct xc_sr_context *ctx);
 
+/**
+ * Populate PFNs
+ *
+ * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+ * unpopulated subset.
+ */
+int (*populate_pfns)(struct xc_sr_context *ctx, unsigned count,
+ const xen_pfn_t *original_pfns, const uint32_t 
*types);
+
+
 /**
  * Process an individual record from the stream.  The caller shall take
  * care of processing common records (e.g. END, PAGE_DATA).
@@ -336,6 +346,11 @@ struct xc_sr_context
 /* HVM context blob. */
 void *context;
 size_t contextsz;
+
+/* Bitmap of currently allocated PFNs during restore. */
+struct xc_sr_bitmap attempted_1g;
+struct xc_sr_bitmap attempted_2m;
+struct xc_sr_bitmap allocated_pfns;
 } restore;
 };
 } x86_hvm;
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index d53948e1a6..1f9fe25b8f 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -68,74 +68,6 @@ static int read_headers(struct xc_sr_context *ctx)
 return 0;
 }
 
-/*
- * Given a set of pfns, obtain memory from Xen to fill the physmap for the
- * unpopulated subset.  If types is NULL, no page type checking is performed
- * and all unpopulated pfns are populated.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned count,
-  const xen_pfn_t *original_pfns, const uint32_t *types)
-{
-xc_interface *xch = ctx->xch;
-xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
-*pfns = malloc(count * sizeof(*pfns));
-unsigned i, nr_pfns = 0;
-int rc = -1;
-
-if ( !mfns || !pfns )
-{
-ERROR("Failed to allocate %zu bytes for populating the physmap",
-  2 * count * sizeof(*mfns));
-goto err;
-}
-
-for ( i = 0; i < count; ++i )
-{
-if ( (!types || (types &&
- (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
-  types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
- !pfn_is_populated(ctx, original_pfns[i]) )
-{
-rc = pfn_set_populated(ctx, original_pfns[i]);
-if ( rc )
-goto err;
-pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
-++nr_pfns;
-}
-}
-
-if ( nr_pfns )
-{
-rc = xc_domain_populate_physmap_exact(
-xch, ctx->domid, nr_pfns, 0, 0, mfns);
-if ( rc )
-{
-PERROR("Failed to populate physmap");
-goto err;
-}
-
-for ( i = 0; i < nr_pfns; ++i )
-{
-if ( mfns[i] == INVALID_MFN )
-{
-ERROR("Populate physmap failed for pfn %u", i);
-rc = -1;
-goto err;
-}
-
-ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
-}
-}
-
-rc = 0;
-
- err:
-free(pfns);
-free(mfns);
-
-return rc;
-}
-
 /*
  * Given a list of pfns, their types, and a block of page data from the
  * stream, populate and record their types, map the relevant subset and copy
@@ -161,7 +93,7 @@ static int process_page_data(struct xc_sr_context *ctx, 
unsigned count,
 goto err;
 }
 
-rc = populate_pfns(ctx, count, pfns, types);
+rc = ctx->restore.ops.populate_pfns(ctx, count, pfns, types);
 if ( rc )
 {
 ERROR("Failed to populate pfns for batch of %u pages", count);
diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c 
b/tools/libxc/xc_sr_restore_x86_hvm.c
index 1dca85354a..60454148db 100644
--- a/tools/libxc/xc_sr_restore_x86_hvm.c
+++ b/tools/libxc/xc_sr_restore_x86_hvm.c
@@ -135,6 +135,8 @@ static int x86_hvm_localise_page(struct xc_sr_context *ctx,
 static int x86_hvm_setup(struct xc_sr_context *ctx)
 {
 xc_int