Re: [PATCH v2] virtiofsd/passthrough_ll: don't remove O_DIRECT when cache=none

2020-04-20 Thread Catherine Ho
Ping :)

---
B.R.
Catherine

On Sat, 11 Apr 2020 at 16:41, Catherine Ho  wrote:
>
> cache=none means to bypass host cache. So we can't remove O_DIRECT flag in
> unconditionally in update_open_flags();
>
> Signed-off-by: Catherine Ho 
> ---
> v2: Fix to keep flags unchanged if cache=none, otherwise changed the file
> without O_DIRECT incorrectly.
>  tools/virtiofsd/passthrough_ll.c |   14 --
>  1 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/tools/virtiofsd/passthrough_ll.c 
> b/tools/virtiofsd/passthrough_ll.c
> index 4c35c95..59e64dd 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -1677,7 +1677,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t 
> ino,
>  fuse_reply_err(req, 0);
>  }
>
> -static void update_open_flags(int writeback, struct fuse_file_info *fi)
> +static void update_open_flags(int writeback, int cache_mode,
> +  struct fuse_file_info *fi)
>  {
>  /*
>   * With writeback cache, kernel may send read requests even
> @@ -1702,10 +1703,11 @@ static void update_open_flags(int writeback, struct 
> fuse_file_info *fi)
>
>  /*
>   * O_DIRECT in guest should not necessarily mean bypassing page
> - * cache on host as well. If somebody needs that behavior, it
> - * probably should be a configuration knob in daemon.
> + * cache on host as well. If cache=none, keep the flag unchanged
>   */
> -fi->flags &= ~O_DIRECT;
> +if (cache_mode != CACHE_NONE) {
> +fi->flags &= ~O_DIRECT;
> +}
>  }
>
>  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
> @@ -1737,7 +1739,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t 
> parent, const char *name,
>  goto out;
>  }
>
> -update_open_flags(lo->writeback, fi);
> +update_open_flags(lo->writeback, lo->cache, fi);
>
>  fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
>  mode);
> @@ -1947,7 +1949,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, 
> struct fuse_file_info *fi)
>  fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
>   fi->flags);
>
> -update_open_flags(lo->writeback, fi);
> +update_open_flags(lo->writeback, lo->cache, fi);
>
>  sprintf(buf, "%i", lo_fd(req, ino));
>  fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
> --
> 1.7.1
>



[PATCH v2] virtiofsd/passthrough_ll: don't remove O_DIRECT when cache=none

2020-04-11 Thread Catherine Ho
cache=none means to bypass host cache. So we can't remove O_DIRECT flag in
unconditionally in update_open_flags();

Signed-off-by: Catherine Ho 
---
v2: Fix to keep flags unchanged if cache=none, otherwise changed the file
without O_DIRECT incorrectly.
 tools/virtiofsd/passthrough_ll.c |   14 --
 1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 4c35c95..59e64dd 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1677,7 +1677,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
 fuse_reply_err(req, 0);
 }
 
-static void update_open_flags(int writeback, struct fuse_file_info *fi)
+static void update_open_flags(int writeback, int cache_mode,
+  struct fuse_file_info *fi)
 {
 /*
  * With writeback cache, kernel may send read requests even
@@ -1702,10 +1703,11 @@ static void update_open_flags(int writeback, struct 
fuse_file_info *fi)
 
 /*
  * O_DIRECT in guest should not necessarily mean bypassing page
- * cache on host as well. If somebody needs that behavior, it
- * probably should be a configuration knob in daemon.
+ * cache on host as well. If cache=none, keep the flag unchanged
  */
-fi->flags &= ~O_DIRECT;
+if (cache_mode != CACHE_NONE) {
+fi->flags &= ~O_DIRECT;
+}
 }
 
 static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
@@ -1737,7 +1739,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, 
const char *name,
 goto out;
 }
 
-update_open_flags(lo->writeback, fi);
+update_open_flags(lo->writeback, lo->cache, fi);
 
 fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
 mode);
@@ -1947,7 +1949,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, 
struct fuse_file_info *fi)
 fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
  fi->flags);
 
-update_open_flags(lo->writeback, fi);
+update_open_flags(lo->writeback, lo->cache, fi);
 
 sprintf(buf, "%i", lo_fd(req, ino));
 fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
-- 
1.7.1




[PATCH] virtiofsd/passthrough_ll: don't remove O_DIRECT when cache=none

2020-04-11 Thread Catherine Ho
cache=none means to bypass host cache. So we can't remove O_DIRECT flag in
unconditionally in update_open_flags();

Signed-off-by: Catherine Ho 
---
 tools/virtiofsd/passthrough_ll.c |   16 ++--
 1 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 4c35c95..889e144 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1677,7 +1677,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
 fuse_reply_err(req, 0);
 }
 
-static void update_open_flags(int writeback, struct fuse_file_info *fi)
+static void update_open_flags(int writeback, int cache_mode,
+  struct fuse_file_info *fi)
 {
 /*
  * With writeback cache, kernel may send read requests even
@@ -1702,10 +1703,13 @@ static void update_open_flags(int writeback, struct 
fuse_file_info *fi)
 
 /*
  * O_DIRECT in guest should not necessarily mean bypassing page
- * cache on host as well. If somebody needs that behavior, it
- * probably should be a configuration knob in daemon.
+ * cache on host as well. If cache=none, set the flag to O_DIRECT
  */
-fi->flags &= ~O_DIRECT;
+if (cache_mode == CACHE_NONE) {
+fi->flags |= O_DIRECT;
+} else {
+fi->flags &= ~O_DIRECT;
+}
 }
 
 static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
@@ -1737,7 +1741,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, 
const char *name,
 goto out;
 }
 
-update_open_flags(lo->writeback, fi);
+update_open_flags(lo->writeback, lo->cache, fi);
 
 fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
 mode);
@@ -1947,7 +1951,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, 
struct fuse_file_info *fi)
 fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
  fi->flags);
 
-update_open_flags(lo->writeback, fi);
+update_open_flags(lo->writeback, lo->cache, fi);
 
 sprintf(buf, "%i", lo_fd(req, ino));
 fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
-- 
1.7.1




[PATCH] target/i386: skip kvm_msr_entry_add when kvm_vmx_basic is 0

2019-12-06 Thread Catherine Ho
Commit 1389309c811b ("KVM: nVMX: expose VMX capabilities for nested
hypervisors to userspace") expands the msr_based_features with
MSR_IA32_VMX_BASIC and others. Then together with an old kernel before
1389309c811b, the qemu call KVM_GET_MSR_FEATURE_INDEX_LIST and got the
smaller kvm_feature_msrs. Then in kvm_arch_get_supported_msr_feature(),
searching VMX_BASIC will be failed and return 0. At last kvm_vmx_basic
will be assigned to 0.

Without this patch, it will cause a qemu crash (host kernel 4.15
ubuntu 18.04+qemu 4.1):
qemu-system-x86_64: error: failed to set MSR 0x480 to 0x0
target/i386/kvm.c:2932: kvm_put_msrs: Assertion `ret ==
cpu->kvm_msr_buf->nmsrs' failed.

This fixes it by skipping kvm_msr_entry_add when kvm_vmx_basic is 0

Cc: Paolo Bonzini 
Signed-off-by: Catherine Ho 
---
 target/i386/kvm.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index a8c44bf..8cf84a2 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -2632,8 +2632,13 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, 
FeatureWordArray f)
  f[FEAT_VMX_SECONDARY_CTLS]));
 kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP,
   f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid);
-kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC,
+
+if (kvm_vmx_basic) {
+   /* Only add the entry when host supports it */
+kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC,
   f[FEAT_VMX_BASIC] | fixed_vmx_basic);
+}
+
 kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC,
   f[FEAT_VMX_MISC] | fixed_vmx_misc);
 if (has_msr_vmx_vmfunc) {
-- 
1.7.1




Re: [PATCH] target/i386: skip kvm_msr_entry_add when kvm_vmx_basic is 0

2019-12-06 Thread Catherine Ho
Hi Paolo and Eduardo
I digged into the put msr assertion bug a little more, and seems I
found the root cause.
Please have a review.

Best regards.
Catherine

On Fri, 6 Dec 2019 at 18:25, Catherine Ho  wrote:
>
> Commit 1389309c811b ("KVM: nVMX: expose VMX capabilities for nested
> hypervisors to userspace") expands the msr_based_features with
> MSR_IA32_VMX_BASIC and others. Then together with an old kernel before
> 1389309c811b, the qemu call KVM_GET_MSR_FEATURE_INDEX_LIST and got the
> smaller kvm_feature_msrs. Then in kvm_arch_get_supported_msr_feature(),
> searching VMX_BASIC will be failed and return 0. At last kvm_vmx_basic
> will be assigned to 0.
>
> Without this patch, it will cause a qemu crash (host kernel 4.15
> ubuntu 18.04+qemu 4.1):
> qemu-system-x86_64: error: failed to set MSR 0x480 to 0x0
> target/i386/kvm.c:2932: kvm_put_msrs: Assertion `ret ==
> cpu->kvm_msr_buf->nmsrs' failed.
>
> This fixes it by skipping kvm_msr_entry_add when kvm_vmx_basic is 0
>
> Cc: Paolo Bonzini 
> Signed-off-by: Catherine Ho 
> ---
>  target/i386/kvm.c |7 ++-
>  1 files changed, 6 insertions(+), 1 deletions(-)
>
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index a8c44bf..8cf84a2 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -2632,8 +2632,13 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, 
> FeatureWordArray f)
>   f[FEAT_VMX_SECONDARY_CTLS]));
>  kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP,
>f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid);
> -kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC,
> +
> +if (kvm_vmx_basic) {
> +   /* Only add the entry when host supports it */
> +kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC,
>f[FEAT_VMX_BASIC] | fixed_vmx_basic);
> +}
> +
>  kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC,
>f[FEAT_VMX_MISC] | fixed_vmx_misc);
>  if (has_msr_vmx_vmfunc) {
> --
> 1.7.1
>



Re: [PATCH] target/i386: relax assert when old host kernels don't include msrs

2019-12-04 Thread Catherine Ho
Hi Paolo


On Wed, 4 Dec 2019 at 21:53, Paolo Bonzini  wrote:
>
> On 04/12/19 14:33, Catherine Ho wrote:
> > Hi Paolo
> > [sorry to resend it, seems to reply it incorrectly]
> >
> > On Wed, 4 Dec 2019 at 19:23, Paolo Bonzini  > <mailto:pbonz...@redhat.com>> wrote:
> >
> > On 04/12/19 09:50, Catherine Ho wrote:
> > > Commit 20a78b02d315 ("target/i386: add VMX features") unconditionally
> > > add vmx msr entry although older host kernels don't include them.
> > >
> > > But old host kernel + newest qemu will cause a qemu crash as follows:
> > > qemu-system-x86_64: error: failed to set MSR 0x480 to 0x0
> > > target/i386/kvm.c:2932: kvm_put_msrs: Assertion `ret ==
> > > cpu->kvm_msr_buf->nmsrs' failed.
> > >
> > > This fixes it by relaxing the condition.
> >
> > This is intentional.  The VMX MSR entries should not have been added.
> > What combination of host kernel/QEMU are you using, and what QEMU
> > command line?
> >
> >
> > Host kernel: 4.15.0 (ubuntu 18.04)
> > Qemu: https://gitlab.com/virtio-fs/qemu/tree/virtio-fs-dev
> > cmdline: qemu-system-x86_64 -M pc -cpu host --enable-kvm -smp 8 \
> >   -m 4G,maxmem=4G
> >
> > But before 20a78b02d315, the older kernel + latest qemu can boot guest
> > successfully.
>
> Ok, so the problem is that some MSR didn't exist in that version.  Which
I thought in my platform, the only MSR didn't exist is MSR_IA32_VMX_BASIC
(0x480). If I remove this kvm_msr_entry_add(), everything is ok, the guest can
be boot up successfully.

> one it is?  Can you make it conditional, similar to MSR_IA32_VMX_VMFUNC?
Ok, I will. Thanks for the suggestion

Best regards
Catherine



Re: [PATCH] target/i386: relax assert when old host kernels don't include msrs

2019-12-04 Thread Catherine Ho
Hi Paolo
[sorry to resend it, seems to reply it incorrectly]

On Wed, 4 Dec 2019 at 19:23, Paolo Bonzini  wrote:

> On 04/12/19 09:50, Catherine Ho wrote:
> > Commit 20a78b02d315 ("target/i386: add VMX features") unconditionally
> > add vmx msr entry although older host kernels don't include them.
> >
> > But old host kernel + newest qemu will cause a qemu crash as follows:
> > qemu-system-x86_64: error: failed to set MSR 0x480 to 0x0
> > target/i386/kvm.c:2932: kvm_put_msrs: Assertion `ret ==
> > cpu->kvm_msr_buf->nmsrs' failed.
> >
> > This fixes it by relaxing the condition.
>
> This is intentional.  The VMX MSR entries should not have been added.
> What combination of host kernel/QEMU are you using, and what QEMU
> command line?
>
>
> Host kernel: 4.15.0 (ubuntu 18.04)
Qemu: https://gitlab.com/virtio-fs/qemu/tree/virtio-fs-dev
cmdline: qemu-system-x86_64 -M pc -cpu host --enable-kvm -smp 8 \
  -m 4G,maxmem=4G

But before 20a78b02d315, the older kernel + latest qemu can boot guest
successfully.

Best Regards,
Catherine


[PATCH] target/i386: relax assert when old host kernels don't include msrs

2019-12-04 Thread Catherine Ho
Commit 20a78b02d315 ("target/i386: add VMX features") unconditionally
add vmx msr entry although older host kernels don't include them.

But old host kernel + newest qemu will cause a qemu crash as follows:
qemu-system-x86_64: error: failed to set MSR 0x480 to 0x0
target/i386/kvm.c:2932: kvm_put_msrs: Assertion `ret ==
cpu->kvm_msr_buf->nmsrs' failed.

This fixes it by relaxing the condition.

Cc: Paolo Bonzini 
Signed-off-by: Catherine Ho 
---
 target/i386/kvm.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index bf16556..a8c44bf 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -2936,7 +2936,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
  (uint32_t)e->index, (uint64_t)e->data);
 }
 
-assert(ret == cpu->kvm_msr_buf->nmsrs);
+assert(ret <= cpu->kvm_msr_buf->nmsrs);
 return 0;
 }
 
-- 
1.7.1




Re: [Qemu-devel] [PATCH v4] migration: do not rom_reset() during incoming migration

2019-08-14 Thread Catherine Ho
Hi Paolo
Ping, is any other comment I hadn't addressed?

Cheers
Catherine

On Thu, 6 Jun 2019 at 02:31, Dr. David Alan Gilbert 
wrote:

> Paolo, can you take this one please.
>
> * Catherine Ho (catherine.h...@gmail.com) wrote:
> > Commit 18269069c310 ("migration: Introduce ignore-shared capability")
> > addes ignore-shared capability to bypass the shared ramblock (e,g,
> > membackend + numa node). It does good to live migration.
> >
> > As told by Yury,this commit expectes that QEMU doesn't write to guest RAM
> > until VM starts, but it does on aarch64 qemu:
> > Backtrace:
> > 1  0x55f4a296dd84 in address_space_write_rom_internal () at
> > exec.c:3458
> > 2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
> > 3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
> > 4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
> > 5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
> > 6  0x55f4a2c9851d in main () at vl.c:4552
> >
> > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram
> > druing rom_reset. In ignore-shared incoming case, this rom filling
> > is not required since all the data has been stored in memory backend
> > file.
> >
> > Further more, as suggested by Peter Xu, if we do rom_reset() now with
> > these ROMs then the RAM data should be re-filled again too with the
> > migration stream coming in.
> >
> > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared
> > capability")
> > Suggested-by: Yury Kotov 
> > Suggested-by: Peter Xu 
> > Signed-off-by: Catherine Ho 
> > ---
> >  hw/core/loader.c | 9 +
> >  1 file changed, 9 insertions(+)
> >
> > diff --git a/hw/core/loader.c b/hw/core/loader.c
> > index fe5cb24122..040109464b 100644
> > --- a/hw/core/loader.c
> > +++ b/hw/core/loader.c
> > @@ -1087,6 +1087,15 @@ static void rom_reset(void *unused)
> >  {
> >  Rom *rom;
> >
> > +/*
> > + * We don't need to fill in the RAM with ROM data because we'll fill
> > + * the data in during the next incoming migration in all cases.
> Note
> > + * that some of those RAMs can actually be modified by the guest on
> ARM
> > + * so this is probably the only right thing to do here.
> > + */
> > +if (runstate_check(RUN_STATE_INMIGRATE))
> > +return;
> > +
> >  QTAILQ_FOREACH(rom, , next) {
> >  if (rom->fw_file) {
> >  continue;
> > --
> > 2.17.1
> >
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
>


Re: [Qemu-devel] [PATCH v4] migration: do not rom_reset() during incoming migration

2019-05-12 Thread Catherine Ho
Sorry for the noise, is there any more comment for this patch?
Without this patch, ignore shared capabilities can not be used on arm64

B.R.
Catherine

On Tue, 16 Apr 2019 at 10:51, Peter Xu  wrote:

> On Mon, Apr 08, 2019 at 04:42:13AM -0400, Catherine Ho wrote:
> > Commit 18269069c310 ("migration: Introduce ignore-shared capability")
> > addes ignore-shared capability to bypass the shared ramblock (e,g,
> > membackend + numa node). It does good to live migration.
> >
> > As told by Yury,this commit expectes that QEMU doesn't write to guest RAM
> > until VM starts, but it does on aarch64 qemu:
> > Backtrace:
> > 1  0x55f4a296dd84 in address_space_write_rom_internal () at
> > exec.c:3458
> > 2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
> > 3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
> > 4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
> > 5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
> > 6  0x55f4a2c9851d in main () at vl.c:4552
> >
> > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram
> > druing rom_reset. In ignore-shared incoming case, this rom filling
> > is not required since all the data has been stored in memory backend
> > file.
> >
> > Further more, as suggested by Peter Xu, if we do rom_reset() now with
> > these ROMs then the RAM data should be re-filled again too with the
> > migration stream coming in.
> >
> > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared
> > capability")
> > Suggested-by: Yury Kotov 
> > Suggested-by: Peter Xu 
> > Signed-off-by: Catherine Ho 
>
> Acked-by: Peter Xu 
>
> --
> Peter Xu
>


Re: [Qemu-devel] [PATCH v4] migration: do not rom_reset() during incoming migration

2019-04-15 Thread Catherine Ho
Ping, thanks

B.R.
Catherine

On Mon, 8 Apr 2019 at 16:43, Catherine Ho  wrote:

> Commit 18269069c310 ("migration: Introduce ignore-shared capability")
> addes ignore-shared capability to bypass the shared ramblock (e,g,
> membackend + numa node). It does good to live migration.
>
> As told by Yury,this commit expectes that QEMU doesn't write to guest RAM
> until VM starts, but it does on aarch64 qemu:
> Backtrace:
> 1  0x55f4a296dd84 in address_space_write_rom_internal () at
> exec.c:3458
> 2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
> 3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
> 4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
> 5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
> 6  0x55f4a2c9851d in main () at vl.c:4552
>
> Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram
> druing rom_reset. In ignore-shared incoming case, this rom filling
> is not required since all the data has been stored in memory backend
> file.
>
> Further more, as suggested by Peter Xu, if we do rom_reset() now with
> these ROMs then the RAM data should be re-filled again too with the
> migration stream coming in.
>
> Fixes: commit 18269069c310 ("migration: Introduce ignore-shared
> capability")
> Suggested-by: Yury Kotov 
> Suggested-by: Peter Xu 
> Signed-off-by: Catherine Ho 
> ---
>  hw/core/loader.c | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/hw/core/loader.c b/hw/core/loader.c
> index fe5cb24122..040109464b 100644
> --- a/hw/core/loader.c
> +++ b/hw/core/loader.c
> @@ -1087,6 +1087,15 @@ static void rom_reset(void *unused)
>  {
>  Rom *rom;
>
> +/*
> + * We don't need to fill in the RAM with ROM data because we'll fill
> + * the data in during the next incoming migration in all cases.  Note
> + * that some of those RAMs can actually be modified by the guest on
> ARM
> + * so this is probably the only right thing to do here.
> + */
> +if (runstate_check(RUN_STATE_INMIGRATE))
> +return;
> +
>  QTAILQ_FOREACH(rom, , next) {
>  if (rom->fw_file) {
>  continue;
> --
> 2.17.1
>
>


Re: [Qemu-devel] [PATCH] hostmem-file: warn when memory-backend-file, share=on and in incoming migration

2019-04-10 Thread Catherine Ho
Hi Dr. David

On Thu, 11 Apr 2019 at 00:57, Dr. David Alan Gilbert 
wrote:

> * Catherine Ho (catherine.h...@gmail.com) wrote:
> > Hi Dr. David
> >
> > On Wed, 10 Apr 2019 at 22:59, Dr. David Alan Gilbert <
> dgilb...@redhat.com>
> > wrote:
> >
> > > * Catherine Ho (catherine.h...@gmail.com) wrote:
> > > > Hi Igor
> > > >
> > > >
> > > > On Mon, 8 Apr 2019 at 18:35, Igor Mammedov 
> wrote:
> > > >
> > > > > On Sun,  7 Apr 2019 22:19:05 -0400
> > > > > Catherine Ho  wrote:
> > > > >
> > > > > > Currently it is not forbidden to use "-object
> > > > > memory-backend-file,share=on"
> > > > > > and together with "-incoming". But after incoming migration is
> > > finished,
> > > > > > the memory-backend-file will be definitely written if share=on.
> So
> > > the
> > > > > > memory-backend-file can only be used once, but failed in the 2nd
> time
> > > > > > incoming.
> > > > > >
> > > > > > Thus it gives a warning and the users can run the qemu if they
> really
> > > > > > want to do it.
> > > > >
> > > > > Shouldn't we add a migration blocker in such a case instead of
> warning
> > > > > and letting qemu run wild?
> > > > >
> > > >
> > > > IMO, it doesn't need to block this. With share=on and -incoming, the
> user
> > > > can
> > > > still save the device memory state into memory-backend file again if
> > > > ignore-shared
> > > > capability is on.
> > > >
> > > > If we block this, the user can't use the ignore-shared capability in
> > > > incoming
> > > > migration.
> > >
> > > -incomign with share=on is a perfectly normal thing to do - it just
> > > depends who you are sharing the file with and the lifetime of that
> > > shared file.
> > >
> > > For example; if you're just running a qemu with vhost-user then you
> > > use share=on - however wyou typically select the backend file as
> > > a new file from /dev/shm - it's not a file that you previously migrated
> > > to.
> > >
> > Thanks,
> > but using a new file from /dev/shm means kernel will start from
> > start_kernel or early? Is it different from the x-ignore-shared case?
> > If we remove the share=on in incoming migration, all the writting
> > of ram will not be flush into the memory backend file. Thus we
> > can use the base memory backend file for ever.
> > e.g.
> > 1) save the vm like a snapshot, current ram state is "kernel
> > has been started, systemd has been started"
> > 2) restore it with -incoming and *no* share=on flag
> > 3) restore it with -incoming and *no* share=on again...
> > In contrary, if we use share=on, the base backend file will
> > be written at once after 1st time incoming.
> >
> > So, IMO, no "share=on" is the proper usage of incoming migration
> > when ignore-shared is on.
> > Please correct me if sth is wrong, thanks:)
>
> OK, I see what you're trying to do - you mean for the 'snapshotting'
> case;  but that's not the only use.  Another use is for being able to
> do a very quick upgrade of the running qemu to a new qemu binary
> version; and in that case you want to be able to write to the shared
> file so that you can repeatedly do the quick migrate.
>
> Dave
>
> Ah, that quick upgrade example makes sense to me. Thanks for the
explanation.

B.R.
Catherine


Re: [Qemu-devel] [PATCH] hostmem-file: warn when memory-backend-file, share=on and in incoming migration

2019-04-10 Thread Catherine Ho
Hi Dr. David

On Wed, 10 Apr 2019 at 22:59, Dr. David Alan Gilbert 
wrote:

> * Catherine Ho (catherine.h...@gmail.com) wrote:
> > Hi Igor
> >
> >
> > On Mon, 8 Apr 2019 at 18:35, Igor Mammedov  wrote:
> >
> > > On Sun,  7 Apr 2019 22:19:05 -0400
> > > Catherine Ho  wrote:
> > >
> > > > Currently it is not forbidden to use "-object
> > > memory-backend-file,share=on"
> > > > and together with "-incoming". But after incoming migration is
> finished,
> > > > the memory-backend-file will be definitely written if share=on. So
> the
> > > > memory-backend-file can only be used once, but failed in the 2nd time
> > > > incoming.
> > > >
> > > > Thus it gives a warning and the users can run the qemu if they really
> > > > want to do it.
> > >
> > > Shouldn't we add a migration blocker in such a case instead of warning
> > > and letting qemu run wild?
> > >
> >
> > IMO, it doesn't need to block this. With share=on and -incoming, the user
> > can
> > still save the device memory state into memory-backend file again if
> > ignore-shared
> > capability is on.
> >
> > If we block this, the user can't use the ignore-shared capability in
> > incoming
> > migration.
>
> -incomign with share=on is a perfectly normal thing to do - it just
> depends who you are sharing the file with and the lifetime of that
> shared file.
>
> For example; if you're just running a qemu with vhost-user then you
> use share=on - however wyou typically select the backend file as
> a new file from /dev/shm - it's not a file that you previously migrated
> to.
>
Thanks,
but using a new file from /dev/shm means kernel will start from
start_kernel or early? Is it different from the x-ignore-shared case?
If we remove the share=on in incoming migration, all the writting
of ram will not be flush into the memory backend file. Thus we
can use the base memory backend file for ever.
e.g.
1) save the vm like a snapshot, current ram state is "kernel
has been started, systemd has been started"
2) restore it with -incoming and *no* share=on flag
3) restore it with -incoming and *no* share=on again...
In contrary, if we use share=on, the base backend file will
be written at once after 1st time incoming.

So, IMO, no "share=on" is the proper usage of incoming migration
when ignore-shared is on.
Please correct me if sth is wrong, thanks:)

B.R.
Catherine


> QEMU has no way to know about the provenance of the shared file it's
> been given, so we can't really warn people about it.
>
> Dave
>
> > B.R.
> > Catherine
> >
> > >
> > >
> > > > Signed-off-by: Catherine Ho 
> > > > ---
> > > >  backends/hostmem-file.c | 11 +++
> > > >  1 file changed, 11 insertions(+)
> > > >
> > > > diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
> > > > index 37ac6445d2..59429ee0b4 100644
> > > > --- a/backends/hostmem-file.c
> > > > +++ b/backends/hostmem-file.c
> > > > @@ -16,6 +16,7 @@
> > > >  #include "sysemu/hostmem.h"
> > > >  #include "sysemu/sysemu.h"
> > > >  #include "qom/object_interfaces.h"
> > > > +#include "migration/migration.h"
> > > >
> > > >  /* hostmem-file.c */
> > > >  /**
> > > > @@ -79,6 +80,16 @@ file_backend_memory_alloc(HostMemoryBackend
> *backend,
> > > Error **errp)
> > > >  }
> > > >  }
> > > >
> > > > +/*
> > > > + * In ignore shared case, if share=on for host memory backend
> file,
> > > > + * the ram might be written after incoming process is finished.
> Thus
> > > > + * the memory backend can't be reused for 2nd/3rd... incoming
> > > > + */
> > > > +if (backend->share && migrate_ignore_shared()
> > > > +   && runstate_check(RUN_STATE_INMIGRATE))
> > > > +warn_report("share=on for memory backend file might be "
> > > > +"conflicted with incoming in ignore shared
> > > case");
> > > > +
> > > >  backend->force_prealloc = mem_prealloc;
> > > >  name = host_memory_backend_get_name(backend);
> > > >  memory_region_init_ram_from_file(>mr, OBJECT(backend),
> > >
> > >
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
>


Re: [Qemu-devel] [PATCH] hostmem-file: warn when memory-backend-file, share=on and in incoming migration

2019-04-08 Thread Catherine Ho
Hi Igor


On Mon, 8 Apr 2019 at 18:35, Igor Mammedov  wrote:

> On Sun,  7 Apr 2019 22:19:05 -0400
> Catherine Ho  wrote:
>
> > Currently it is not forbidden to use "-object
> memory-backend-file,share=on"
> > and together with "-incoming". But after incoming migration is finished,
> > the memory-backend-file will be definitely written if share=on. So the
> > memory-backend-file can only be used once, but failed in the 2nd time
> > incoming.
> >
> > Thus it gives a warning and the users can run the qemu if they really
> > want to do it.
>
> Shouldn't we add a migration blocker in such a case instead of warning
> and letting qemu run wild?
>

IMO, it doesn't need to block this. With share=on and -incoming, the user
can
still save the device memory state into memory-backend file again if
ignore-shared
capability is on.

If we block this, the user can't use the ignore-shared capability in
incoming
migration.

B.R.
Catherine

>
>
> > Signed-off-by: Catherine Ho 
> > ---
> >  backends/hostmem-file.c | 11 +++
> >  1 file changed, 11 insertions(+)
> >
> > diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
> > index 37ac6445d2..59429ee0b4 100644
> > --- a/backends/hostmem-file.c
> > +++ b/backends/hostmem-file.c
> > @@ -16,6 +16,7 @@
> >  #include "sysemu/hostmem.h"
> >  #include "sysemu/sysemu.h"
> >  #include "qom/object_interfaces.h"
> > +#include "migration/migration.h"
> >
> >  /* hostmem-file.c */
> >  /**
> > @@ -79,6 +80,16 @@ file_backend_memory_alloc(HostMemoryBackend *backend,
> Error **errp)
> >  }
> >  }
> >
> > +/*
> > + * In ignore shared case, if share=on for host memory backend file,
> > + * the ram might be written after incoming process is finished. Thus
> > + * the memory backend can't be reused for 2nd/3rd... incoming
> > + */
> > +if (backend->share && migrate_ignore_shared()
> > +   && runstate_check(RUN_STATE_INMIGRATE))
> > +warn_report("share=on for memory backend file might be "
> > +"conflicted with incoming in ignore shared
> case");
> > +
> >  backend->force_prealloc = mem_prealloc;
> >  name = host_memory_backend_get_name(backend);
> >  memory_region_init_ram_from_file(>mr, OBJECT(backend),
>
>


[Qemu-devel] [PATCH v2] hostmem-file: warn when memory-backend-file, share=on and in incoming migration

2019-04-08 Thread Catherine Ho
Currently it is not forbidden to use "-object memory-backend-file,share=on"
and together with "-incoming". But after incoming migration is finished,
the memory-backend-file will be definitely written if share=on. So the
memory-backend-file can only be used once, but failed in the 2nd time
incoming.

Thus it gives a warning and the users can run the qemu if they really
want to do it.

Signed-off-by: Catherine Ho 
---
 backends/hostmem-file.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 37ac6445d2..ce03dc0a18 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -16,6 +16,7 @@
 #include "sysemu/hostmem.h"
 #include "sysemu/sysemu.h"
 #include "qom/object_interfaces.h"
+#include "migration/migration.h"
 
 /* hostmem-file.c */
 /**
@@ -79,6 +80,17 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error 
**errp)
 }
 }
 
+/*
+ * In ignore shared incoming migration, if share=on for host memory
+ * backend file, the ram might be modified after incoming process.
+ * The user should know this potential risk.
+ */
+if (backend->share && migrate_ignore_shared()
+   && runstate_check(RUN_STATE_INMIGRATE))
+warn_report("NOTE: Please make sure the data on the shared memory "
+"backend file and the data from the incoming migration"
+" stream contains matching contents, otherwise...");
+
 backend->force_prealloc = mem_prealloc;
 name = host_memory_backend_get_name(backend);
 memory_region_init_ram_from_file(>mr, OBJECT(backend),
-- 
2.17.1




[Qemu-devel] [PATCH v4] migration: do not rom_reset() during incoming migration

2019-04-08 Thread Catherine Ho
Commit 18269069c310 ("migration: Introduce ignore-shared capability")
addes ignore-shared capability to bypass the shared ramblock (e,g,
membackend + numa node). It does good to live migration.

As told by Yury,this commit expectes that QEMU doesn't write to guest RAM
until VM starts, but it does on aarch64 qemu:
Backtrace:
1  0x55f4a296dd84 in address_space_write_rom_internal () at
exec.c:3458
2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
6  0x55f4a2c9851d in main () at vl.c:4552

Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram
druing rom_reset. In ignore-shared incoming case, this rom filling
is not required since all the data has been stored in memory backend
file.

Further more, as suggested by Peter Xu, if we do rom_reset() now with
these ROMs then the RAM data should be re-filled again too with the
migration stream coming in.

Fixes: commit 18269069c310 ("migration: Introduce ignore-shared
capability")
Suggested-by: Yury Kotov 
Suggested-by: Peter Xu 
Signed-off-by: Catherine Ho 
---
 hw/core/loader.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index fe5cb24122..040109464b 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1087,6 +1087,15 @@ static void rom_reset(void *unused)
 {
 Rom *rom;
 
+/*
+ * We don't need to fill in the RAM with ROM data because we'll fill
+ * the data in during the next incoming migration in all cases.  Note
+ * that some of those RAMs can actually be modified by the guest on ARM
+ * so this is probably the only right thing to do here.
+ */
+if (runstate_check(RUN_STATE_INMIGRATE))
+return;
+
 QTAILQ_FOREACH(rom, , next) {
 if (rom->fw_file) {
 continue;
-- 
2.17.1




Re: [Qemu-devel] [PATCH] hostmem-file: warn when memory-backend-file, share=on and in incoming migration

2019-04-08 Thread Catherine Ho
Hi Peter Xu

On Mon, 8 Apr 2019 at 11:25, Peter Xu  wrote:

> On Sun, Apr 07, 2019 at 10:19:05PM -0400, Catherine Ho wrote:
> > Currently it is not forbidden to use "-object
> memory-backend-file,share=on"
> > and together with "-incoming". But after incoming migration is finished,
> > the memory-backend-file will be definitely written if share=on. So the
> > memory-backend-file can only be used once, but failed in the 2nd time
> > incoming.
> >
> > Thus it gives a warning and the users can run the qemu if they really
> > want to do it.
> >
> > Signed-off-by: Catherine Ho 
> > ---
> >  backends/hostmem-file.c | 11 +++
> >  1 file changed, 11 insertions(+)
> >
> > diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
> > index 37ac6445d2..59429ee0b4 100644
> > --- a/backends/hostmem-file.c
> > +++ b/backends/hostmem-file.c
> > @@ -16,6 +16,7 @@
> >  #include "sysemu/hostmem.h"
> >  #include "sysemu/sysemu.h"
> >  #include "qom/object_interfaces.h"
> > +#include "migration/migration.h"
> >
> >  /* hostmem-file.c */
> >  /**
> > @@ -79,6 +80,16 @@ file_backend_memory_alloc(HostMemoryBackend *backend,
> Error **errp)
> >  }
> >  }
> >
> > +/*
> > + * In ignore shared case, if share=on for host memory backend file,
> > + * the ram might be written after incoming process is finished. Thus
> > + * the memory backend can't be reused for 2nd/3rd... incoming
> > + */
> > +if (backend->share && migrate_ignore_shared()
> > +   && runstate_check(RUN_STATE_INMIGRATE))
> > +warn_report("share=on for memory backend file might be "
> > +"conflicted with incoming in ignore shared
> case");
>
> I feel like this message wasn't really clear to me...  you want to
> warn people these data might not match with each other, right?  How
> about simply state it:
>
>   NOTE: Please make sure the data on the shared memory backend file
>   and the data from the incoming migration stream contains matching
>   contents, otherwise...
>

Sorry for my vague expression.
The background is [1]
I happened to use "-object memory-backend-file,share=on" and together with
"-incoming".
It worked fine in 1st incoming migration but failed in 2nd, 3rd incoming
migration.
Because qemu uses qemu_ram_mmap(..., MAP_SHARED,...) when share=on, the
memory-backend-file will be written after 1st incoming migration.
Finally I realized that this was caused by "share=on" flag, and after I
removed it,
the memory-backend-file will not be changed any more.

So do you think it will be better that qemu gives the user a clear warning
that
incoming migration will change the data in  memory-backend-file  with
"share=on" ?

[1] http://lists.nongnu.org/archive/html/qemu-devel/2019-04/msg00350.html


> Regards,
>
> --
> Peter Xu
>


[Qemu-devel] [PATCH] hostmem-file: warn when memory-backend-file, share=on and in incoming migration

2019-04-07 Thread Catherine Ho
Currently it is not forbidden to use "-object memory-backend-file,share=on"
and together with "-incoming". But after incoming migration is finished,
the memory-backend-file will be definitely written if share=on. So the
memory-backend-file can only be used once, but failed in the 2nd time
incoming.

Thus it gives a warning and the users can run the qemu if they really
want to do it.

Signed-off-by: Catherine Ho 
---
 backends/hostmem-file.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 37ac6445d2..59429ee0b4 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -16,6 +16,7 @@
 #include "sysemu/hostmem.h"
 #include "sysemu/sysemu.h"
 #include "qom/object_interfaces.h"
+#include "migration/migration.h"
 
 /* hostmem-file.c */
 /**
@@ -79,6 +80,16 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error 
**errp)
 }
 }
 
+/*
+ * In ignore shared case, if share=on for host memory backend file,
+ * the ram might be written after incoming process is finished. Thus
+ * the memory backend can't be reused for 2nd/3rd... incoming
+ */
+if (backend->share && migrate_ignore_shared()
+   && runstate_check(RUN_STATE_INMIGRATE))
+warn_report("share=on for memory backend file might be "
+"conflicted with incoming in ignore shared case");
+
 backend->force_prealloc = mem_prealloc;
 name = host_memory_backend_get_name(backend);
 memory_region_init_ram_from_file(>mr, OBJECT(backend),
-- 
2.17.1




[Qemu-devel] [PATCH v3] migration: do not rom_reset() during incoming migration

2019-04-07 Thread Catherine Ho
Commit 18269069c310 ("migration: Introduce ignore-shared capability")
addes ignore-shared capability to bypass the shared ramblock (e,g,
membackend + numa node). It does good to live migration.

As told by Yury,this commit expectes that QEMU doesn't write to guest RAM
until VM starts, but it does on aarch64 qemu:
Backtrace:
1  0x55f4a296dd84 in address_space_write_rom_internal () at
exec.c:3458
2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
6  0x55f4a2c9851d in main () at vl.c:4552

Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram
druing rom_reset. In ignore-shared incoming case, this rom filling
is not required since all the data has been stored in memory backend
file.

Further more, as suggested by Peter Xu, if we do rom_reset() now with
these ROMs then the RAM data should be re-filled again too with the
migration stream coming in.

Fixes: commit 18269069c310 ("migration: Introduce ignore-shared
capability")
Suggested-by: Yury Kotov 
Suggested-by: Peter Xu 
Signed-off-by: Catherine Ho 
---
 hw/core/loader.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index fe5cb24122..946bb8ff00 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1087,6 +1087,13 @@ static void rom_reset(void *unused)
 {
 Rom *rom;
 
+/*
+ * If we do rom_reset() now with these ROMs then the RAM data should be
+ * re-filled again too with the migration stream coming in.
+ */
+if (runstate_check(RUN_STATE_INMIGRATE))
+return;
+
 QTAILQ_FOREACH(rom, , next) {
 if (rom->fw_file) {
 continue;
-- 
2.17.1




Re: [Qemu-devel] [PATCH v2] migration: avoid filling ignore-shared ramblock when in incoming migration

2019-04-04 Thread Catherine Ho
Hi Peter Xu

On Thu, 4 Apr 2019 at 12:25, Peter Xu  wrote:

> On Wed, Apr 03, 2019 at 11:21:47PM +0800, Catherine Ho wrote:
> > Hi Peter Xu
> >
> > On Wed, 3 Apr 2019 at 10:25, Peter Xu  wrote:
> >
> > > On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote:
> > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability")
> > > > addes ignore-shared capability to bypass the shared ramblock (e,g,
> > > > membackend + numa node). It does good to live migration.
> > > >
> > > > This commit expectes that QEMU doesn't write to guest RAM until
> > > > VM starts, but it does on aarch64 qemu:
> > > > Backtrace:
> > > > 1  0x55f4a296dd84 in address_space_write_rom_internal () at
> > > exec.c:3458
> > > > 2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
> > > > 3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
> > > > 4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
> > > > 5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
> > > > 6  0x55f4a2c9851d in main () at vl.c:4552
> > > >
> > > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into
> ram
> > > > during rom_reset. In ignore-shared incoming case, this rom filling
> > > > is not required since all the data has been stored in memory backend
> > > file.
> > > >
> > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared
> > > capability")
> > > > Signed-off-by: Catherine Ho 
> > > > Suggested-by: Yury Kotov 
> > >
> > > (note: IIUC normally you should have your signed-off to be the last
> > >  line before the suggested-by :)
> > >
> > > About the patch content, I have had a question on whether we should
> > > need to check ignore-shared at all... That question lies in:
> > >
> > > https://patchwork.kernel.org/patch/10859889/#22546487
> > >
> > > And if my understanding was correct above, IMHO the patch could be as
> > > simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1]
> > > below.
> > >
> > >
> > Thanks, but I thought this method would break the x86 rom_reset logic
> during
> > RUN_STATE_INMIGRATE.
> > Please see the debugging patch and log lines below:
> > diff --git a/hw/core/loader.c b/hw/core/loader.c
> > index fe5cb24122..b0c871af26 100644
> > --- a/hw/core/loader.c
> > +++ b/hw/core/loader.c
> > @@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t
> > bootindex)
> >  static void rom_reset(void *unused)
> >  {
> >  Rom *rom;
> > -
> >  QTAILQ_FOREACH(rom, , next) {
> > +if (runstate_check(RUN_STATE_INMIGRATE))
> > +   printf("rom name=%s\n",rom->name);
> >  if (rom->fw_file) {
> >  continue;
> >  }
> >
> > rom name=kvmvapic.bin
> > rom name=linuxboot_dma.bin
> > rom name=bios-256k.bin
> > rom name=etc/acpi/tables
> > rom name=etc/table-loader
> > rom name=etc/acpi/rsdp
>
> Hi, Catherine,
>
> I only see that rom names were dumped.  Could you help explain what is
> broken?  Thanks,
>
> Sorry, I have another concern here. What if there is no memory_backend
file?
If there is no memory backend file (i.e. without -object
memory-backend-file,id=dimm1,size=512M,mem-path=/path/memory)

Should the rom blobs still be written into ram in such case?

B.R.
Catherine

> --
> Peter Xu
>


Re: [Qemu-devel] [PATCH v2] migration: avoid filling ignore-shared ramblock when in incoming migration

2019-04-04 Thread Catherine Ho
Hi Peter Xu

On Thu, 4 Apr 2019 at 12:25, Peter Xu  wrote:

> On Wed, Apr 03, 2019 at 11:21:47PM +0800, Catherine Ho wrote:
> > Hi Peter Xu
> >
> > On Wed, 3 Apr 2019 at 10:25, Peter Xu  wrote:
> >
> > > On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote:
> > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability")
> > > > addes ignore-shared capability to bypass the shared ramblock (e,g,
> > > > membackend + numa node). It does good to live migration.
> > > >
> > > > This commit expectes that QEMU doesn't write to guest RAM until
> > > > VM starts, but it does on aarch64 qemu:
> > > > Backtrace:
> > > > 1  0x55f4a296dd84 in address_space_write_rom_internal () at
> > > exec.c:3458
> > > > 2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
> > > > 3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
> > > > 4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
> > > > 5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
> > > > 6  0x55f4a2c9851d in main () at vl.c:4552
> > > >
> > > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into
> ram
> > > > during rom_reset. In ignore-shared incoming case, this rom filling
> > > > is not required since all the data has been stored in memory backend
> > > file.
> > > >
> > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared
> > > capability")
> > > > Signed-off-by: Catherine Ho 
> > > > Suggested-by: Yury Kotov 
> > >
> > > (note: IIUC normally you should have your signed-off to be the last
> > >  line before the suggested-by :)
> > >
> > > About the patch content, I have had a question on whether we should
> > > need to check ignore-shared at all... That question lies in:
> > >
> > > https://patchwork.kernel.org/patch/10859889/#22546487
> > >
> > > And if my understanding was correct above, IMHO the patch could be as
> > > simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1]
> > > below.
> > >
> > >
> > Thanks, but I thought this method would break the x86 rom_reset logic
> during
> > RUN_STATE_INMIGRATE.
> > Please see the debugging patch and log lines below:
> > diff --git a/hw/core/loader.c b/hw/core/loader.c
> > index fe5cb24122..b0c871af26 100644
> > --- a/hw/core/loader.c
> > +++ b/hw/core/loader.c
> > @@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t
> > bootindex)
> >  static void rom_reset(void *unused)
> >  {
> >  Rom *rom;
> > -
> >  QTAILQ_FOREACH(rom, , next) {
> > +if (runstate_check(RUN_STATE_INMIGRATE))
> > +   printf("rom name=%s\n",rom->name);
> >  if (rom->fw_file) {
> >  continue;
> >  }
> >
> > rom name=kvmvapic.bin
> > rom name=linuxboot_dma.bin
> > rom name=bios-256k.bin
> > rom name=etc/acpi/tables
> > rom name=etc/table-loader
> > rom name=etc/acpi/rsdp
>
> Hi, Catherine,
>
> I only see that rom names were dumped.  Could you help explain what is
> broken?  Thanks,
>
Thanks for the suggestion. I haven't seen any obvious errors on x86 with
this change.
I merely consider not to change the old code logic too much.
Ok, I will change it as you suggested if no more comments.

B.R.
Catherine

>
> --
> Peter Xu
>


Re: [Qemu-devel] [PATCH v2] migration: avoid filling ignore-shared ramblock when in incoming migration

2019-04-03 Thread Catherine Ho
Hi Peter Xu

On Wed, 3 Apr 2019 at 10:25, Peter Xu  wrote:

> On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote:
> > Commit 18269069c310 ("migration: Introduce ignore-shared capability")
> > addes ignore-shared capability to bypass the shared ramblock (e,g,
> > membackend + numa node). It does good to live migration.
> >
> > This commit expectes that QEMU doesn't write to guest RAM until
> > VM starts, but it does on aarch64 qemu:
> > Backtrace:
> > 1  0x55f4a296dd84 in address_space_write_rom_internal () at
> exec.c:3458
> > 2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
> > 3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
> > 4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
> > 5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
> > 6  0x55f4a2c9851d in main () at vl.c:4552
> >
> > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram
> > during rom_reset. In ignore-shared incoming case, this rom filling
> > is not required since all the data has been stored in memory backend
> file.
> >
> > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared
> capability")
> > Signed-off-by: Catherine Ho 
> > Suggested-by: Yury Kotov 
>
> (note: IIUC normally you should have your signed-off to be the last
>  line before the suggested-by :)
>
> About the patch content, I have had a question on whether we should
> need to check ignore-shared at all... That question lies in:
>
> https://patchwork.kernel.org/patch/10859889/#22546487
>
> And if my understanding was correct above, IMHO the patch could be as
> simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1]
> below.
>
>
Thanks, but I thought this method would break the x86 rom_reset logic during
RUN_STATE_INMIGRATE.
Please see the debugging patch and log lines below:
diff --git a/hw/core/loader.c b/hw/core/loader.c
index fe5cb24122..b0c871af26 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t
bootindex)
 static void rom_reset(void *unused)
 {
 Rom *rom;
-
 QTAILQ_FOREACH(rom, , next) {
+if (runstate_check(RUN_STATE_INMIGRATE))
+   printf("rom name=%s\n",rom->name);
 if (rom->fw_file) {
 continue;
 }

rom name=kvmvapic.bin
rom name=linuxboot_dma.bin
rom name=bios-256k.bin
rom name=etc/acpi/tables
rom name=etc/table-loader
rom name=etc/acpi/rsdp

B.R.
Catherine

> Thanks,
>
> > ---
> >  hw/core/loader.c  | 15 +++
> >  include/exec/cpu-common.h |  1 +
> >  migration/ram.c   |  2 +-
> >  3 files changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/core/loader.c b/hw/core/loader.c
> > index fe5cb24122..861a03335b 100644
> > --- a/hw/core/loader.c
> > +++ b/hw/core/loader.c
> > @@ -53,6 +53,7 @@
> >  #include "hw/nvram/fw_cfg.h"
> >  #include "exec/memory.h"
> >  #include "exec/address-spaces.h"
> > +#include "exec/cpu-common.h"
> >  #include "hw/boards.h"
> >  #include "qemu/cutils.h"
> >
> > @@ -1086,6 +1087,9 @@ int rom_add_option(const char *file, int32_t
> bootindex)
> >  static void rom_reset(void *unused)
> >  {
> >  Rom *rom;
> > +MemoryRegion *mr;
> > +hwaddr hw_addr;
> > +hwaddr l;
>
> [1]
>
> >
> >  QTAILQ_FOREACH(rom, , next) {
> >  if (rom->fw_file) {
> > @@ -1094,6 +1098,17 @@ static void rom_reset(void *unused)
> >  if (rom->data == NULL) {
> >  continue;
> >  }
> > +
> > +/* bypass the rom blob in ignore-shared migration case*/
> > +if (runstate_check(RUN_STATE_INMIGRATE)) {
> > +rcu_read_lock();
> > +mr = address_space_translate(rom->as, rom->addr, _addr,
> ,
> > + true, MEMTXATTRS_UNSPECIFIED);
> > +rcu_read_unlock();
> > +if (mr->ram_block != NULL &&
> ramblock_is_ignored(mr->ram_block))
> > +continue;
> > +}
> > +
> >  if (rom->mr) {
> >  void *host = memory_region_get_ram_ptr(rom->mr);
> >  memcpy(host, rom->data, rom->datasize);
> > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
> > index cef8b88a2a..c80b7248a6 100644
> > --- a/include/exec/cpu-common.h
> > +++ b/include/exec/cpu-common.h
>

[Qemu-devel] [PATCH v2] migration: avoid filling ignore-shared ramblock when in incoming migration

2019-04-02 Thread Catherine Ho
Commit 18269069c310 ("migration: Introduce ignore-shared capability")
addes ignore-shared capability to bypass the shared ramblock (e,g,
membackend + numa node). It does good to live migration.

This commit expectes that QEMU doesn't write to guest RAM until
VM starts, but it does on aarch64 qemu:
Backtrace:
1  0x55f4a296dd84 in address_space_write_rom_internal () at exec.c:3458
2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
6  0x55f4a2c9851d in main () at vl.c:4552

Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram
during rom_reset. In ignore-shared incoming case, this rom filling
is not required since all the data has been stored in memory backend file.

Fixes: commit 18269069c310 ("migration: Introduce ignore-shared capability")
Signed-off-by: Catherine Ho 
Suggested-by: Yury Kotov 
---
 hw/core/loader.c  | 15 +++
 include/exec/cpu-common.h |  1 +
 migration/ram.c   |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index fe5cb24122..861a03335b 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -53,6 +53,7 @@
 #include "hw/nvram/fw_cfg.h"
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
+#include "exec/cpu-common.h"
 #include "hw/boards.h"
 #include "qemu/cutils.h"
 
@@ -1086,6 +1087,9 @@ int rom_add_option(const char *file, int32_t bootindex)
 static void rom_reset(void *unused)
 {
 Rom *rom;
+MemoryRegion *mr;
+hwaddr hw_addr;
+hwaddr l;
 
 QTAILQ_FOREACH(rom, , next) {
 if (rom->fw_file) {
@@ -1094,6 +1098,17 @@ static void rom_reset(void *unused)
 if (rom->data == NULL) {
 continue;
 }
+
+/* bypass the rom blob in ignore-shared migration case*/
+if (runstate_check(RUN_STATE_INMIGRATE)) {
+rcu_read_lock();
+mr = address_space_translate(rom->as, rom->addr, _addr, ,
+ true, MEMTXATTRS_UNSPECIFIED);
+rcu_read_unlock();
+if (mr->ram_block != NULL && ramblock_is_ignored(mr->ram_block))
+continue;
+}
+
 if (rom->mr) {
 void *host = memory_region_get_ram_ptr(rom->mr);
 memcpy(host, rom->data, rom->datasize);
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index cef8b88a2a..c80b7248a6 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -76,6 +76,7 @@ void *qemu_ram_get_host_addr(RAMBlock *rb);
 ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
 ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
+bool ramblock_is_ignored(RAMBlock *block);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
 bool qemu_ram_is_migratable(RAMBlock *rb);
diff --git a/migration/ram.c b/migration/ram.c
index 35bd6213e9..d6de9d335d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -159,7 +159,7 @@ out:
 return ret;
 }
 
-static bool ramblock_is_ignored(RAMBlock *block)
+bool ramblock_is_ignored(RAMBlock *block)
 {
 return !qemu_ram_is_migratable(block) ||
(migrate_ignore_shared() && qemu_ram_is_shared(block));
-- 
2.17.1




Re: [Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-04-02 Thread Catherine Ho
On Tue, 2 Apr 2019 at 22:17, Catherine Ho  wrote:

>
>
> On Tue, 2 Apr 2019 at 20:37, Peter Xu  wrote:
>
>> On Tue, Apr 02, 2019 at 05:06:15PM +0800, Catherine Ho wrote:
>> > On Tue, 2 Apr 2019 at 15:58, Peter Xu  wrote:
>> >
>> > > On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote:
>> > > > Hi Peter Maydell
>> > > >
>> > > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell <
>> peter.mayd...@linaro.org>
>> > > wrote:
>> > > >
>> > > > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho <
>> catherine.h...@gmail.com>
>> > > > > wrote:
>> > > > > > The root cause is the used idx is moved forward after 1st time
>> > > incoming,
>> > > > > and in 2nd time incoming,
>> > > > > > the last_avail_idx will be incorrectly restored from the saved
>> device
>> > > > > state file(not in the ram).
>> > > > > >
>> > > > > > I watched this even on x86 for a virtio-scsi disk
>> > > > > >
>> > > > > > Any ideas for supporting 2nd time, 3rd time... incoming
>> restoring?
>> > > > >
>> > > > > Does the destination end go through reset between the 1st and 2nd
>> > > > >
>> > > > seems not, please see my step below
>> > > >
>> > > > > incoming attempts? I'm not a migration expert, but I thought that
>> > > > > devices were allowed to assume that their state is "state of the
>> > > > > device following QEMU reset" before the start of an incoming
>> > > > > migration attempt.
>> > > > >
>> > > >
>> > > > Here  is my step:
>> > > > 1. start guest normal by qemu with shared memory-backend file
>> > > > 2. stop the vm. save the device state to another file via monitor
>> migrate
>> > > > "exec: cat>..."
>> > > > 3. quit the vm
>> > > > 4. retore the vm by qemu -incoming "exec:cat ..."
>> > > > 5. continue the vm via monito, the 1st incoming works fine
>> > > > 6. quit the vm
>> > > > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time
>> > > > 8. continue   -> error happened
>> > > > Actually, this can be fixed by forcely restore the idx by
>> > > > virtio_queue_restore_last_avail_idx()
>> > > > But I am sure whether it is reasonable.
>> > >
>> > > Yeah I really suspect its validity.
>> > >
>> > > IMHO normal migration streams keep the device state and RAM data
>> > > together in the dumped file, so they always match.
>> > >
>> > > In your shared case, the device states are in the dumped file however
>> > > the RAM data is located somewhere else.  After you quit the VM from
>> > > the 1st incoming migration the RAM is new (because that's a shared
>> > > memory file) and the device data is still old.  They do not match
>> > > already, then I'd say you can't migrate with that any more.
>> > >
>> > > If you want to do that, you'd better take snapshot of the RAM backend
>> > > file if your filesystem supports (or even simpler, to back it up
>> > > before hand) before you start any incoming migration.  Then with the
>> > > dumped file (which contains the device states) and that snapshot file
>> > > (which contains the exact RAM data that matches the device states)
>> > > you'll alway be able to migrate for as many times as you want.
>> > >
>> >
>> > Understood, thanks Peter Xu
>> > Is there any feasible way to indicate the snapshot of the RAM backend
>> file
>> > is
>> > matched with the device data?
>> > >VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688
>> > >Failed to load virtio-scsi:virtio
>> >
>> > Because I thought reporting above error is not so friendly. Could we
>> add a
>> > version id in both RAM backend file and device date file?
>>
>> It would be non-trivial I'd say - AFAIK we don't have an existing way
>> to tag the memory-backend-file content (IIUC that's what you use).
>>
>> And since you mentioned about versioning of these states, I just
>> remembered that even with this you may not be able to get a complete
>> matched state of the VM, 

Re: [Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-04-02 Thread Catherine Ho
On Tue, 2 Apr 2019 at 20:37, Peter Xu  wrote:

> On Tue, Apr 02, 2019 at 05:06:15PM +0800, Catherine Ho wrote:
> > On Tue, 2 Apr 2019 at 15:58, Peter Xu  wrote:
> >
> > > On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote:
> > > > Hi Peter Maydell
> > > >
> > > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell  >
> > > wrote:
> > > >
> > > > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho <
> catherine.h...@gmail.com>
> > > > > wrote:
> > > > > > The root cause is the used idx is moved forward after 1st time
> > > incoming,
> > > > > and in 2nd time incoming,
> > > > > > the last_avail_idx will be incorrectly restored from the saved
> device
> > > > > state file(not in the ram).
> > > > > >
> > > > > > I watched this even on x86 for a virtio-scsi disk
> > > > > >
> > > > > > Any ideas for supporting 2nd time, 3rd time... incoming
> restoring?
> > > > >
> > > > > Does the destination end go through reset between the 1st and 2nd
> > > > >
> > > > seems not, please see my step below
> > > >
> > > > > incoming attempts? I'm not a migration expert, but I thought that
> > > > > devices were allowed to assume that their state is "state of the
> > > > > device following QEMU reset" before the start of an incoming
> > > > > migration attempt.
> > > > >
> > > >
> > > > Here  is my step:
> > > > 1. start guest normal by qemu with shared memory-backend file
> > > > 2. stop the vm. save the device state to another file via monitor
> migrate
> > > > "exec: cat>..."
> > > > 3. quit the vm
> > > > 4. retore the vm by qemu -incoming "exec:cat ..."
> > > > 5. continue the vm via monito, the 1st incoming works fine
> > > > 6. quit the vm
> > > > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time
> > > > 8. continue   -> error happened
> > > > Actually, this can be fixed by forcely restore the idx by
> > > > virtio_queue_restore_last_avail_idx()
> > > > But I am sure whether it is reasonable.
> > >
> > > Yeah I really suspect its validity.
> > >
> > > IMHO normal migration streams keep the device state and RAM data
> > > together in the dumped file, so they always match.
> > >
> > > In your shared case, the device states are in the dumped file however
> > > the RAM data is located somewhere else.  After you quit the VM from
> > > the 1st incoming migration the RAM is new (because that's a shared
> > > memory file) and the device data is still old.  They do not match
> > > already, then I'd say you can't migrate with that any more.
> > >
> > > If you want to do that, you'd better take snapshot of the RAM backend
> > > file if your filesystem supports (or even simpler, to back it up
> > > before hand) before you start any incoming migration.  Then with the
> > > dumped file (which contains the device states) and that snapshot file
> > > (which contains the exact RAM data that matches the device states)
> > > you'll alway be able to migrate for as many times as you want.
> > >
> >
> > Understood, thanks Peter Xu
> > Is there any feasible way to indicate the snapshot of the RAM backend
> file
> > is
> > matched with the device data?
> > >VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688
> > >Failed to load virtio-scsi:virtio
> >
> > Because I thought reporting above error is not so friendly. Could we add
> a
> > version id in both RAM backend file and device date file?
>
> It would be non-trivial I'd say - AFAIK we don't have an existing way
> to tag the memory-backend-file content (IIUC that's what you use).
>
> And since you mentioned about versioning of these states, I just
> remembered that even with this you may not be able to get a complete
> matched state of the VM, because AFAICT actually besides RAM state &
> device state, you probably also need to consider the disk state as
> well.  After you started the VM of the 1st incoming, there could be
> data flushed to the VM backend disk and then that state is changed as
> well.  So here even if you snapshot the RAM file you'll still lose the
> disk state IIUC so it could still be broken.  In other words, to make
> a migration/snapshot to work you'll need to make all these three
> states to match.
>

Yes, thanks

>
> Before we discuss further on the topic... could you share me with your
> requirement first?  I started to get a bit confused now since when I
> thought about shared mem I was thinking about migrating within the
> same host to e.g. upgrade the hypervisor but that obviously does not
> need you to do incoming migration for multiple times.  Then what do
> you finally want to achieve?
>
Actually, I am investigating the ignore-shared capability case support on
arm64. This feature is used in Kata containers project as "vm template"
 The rom reset failure is the first bug.
Ok, now I can confirm that doing incoming migration for multiple times is
supported. Thanks for the detailed explanation :)
B.R.
Catherine


Re: [Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-04-02 Thread Catherine Ho
On Tue, 2 Apr 2019 at 15:58, Peter Xu  wrote:

> On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote:
> > Hi Peter Maydell
> >
> > On Tue, 2 Apr 2019 at 11:05, Peter Maydell 
> wrote:
> >
> > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho 
> > > wrote:
> > > > The root cause is the used idx is moved forward after 1st time
> incoming,
> > > and in 2nd time incoming,
> > > > the last_avail_idx will be incorrectly restored from the saved device
> > > state file(not in the ram).
> > > >
> > > > I watched this even on x86 for a virtio-scsi disk
> > > >
> > > > Any ideas for supporting 2nd time, 3rd time... incoming restoring?
> > >
> > > Does the destination end go through reset between the 1st and 2nd
> > >
> > seems not, please see my step below
> >
> > > incoming attempts? I'm not a migration expert, but I thought that
> > > devices were allowed to assume that their state is "state of the
> > > device following QEMU reset" before the start of an incoming
> > > migration attempt.
> > >
> >
> > Here  is my step:
> > 1. start guest normal by qemu with shared memory-backend file
> > 2. stop the vm. save the device state to another file via monitor migrate
> > "exec: cat>..."
> > 3. quit the vm
> > 4. retore the vm by qemu -incoming "exec:cat ..."
> > 5. continue the vm via monito, the 1st incoming works fine
> > 6. quit the vm
> > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time
> > 8. continue   -> error happened
> > Actually, this can be fixed by forcely restore the idx by
> > virtio_queue_restore_last_avail_idx()
> > But I am sure whether it is reasonable.
>
> Yeah I really suspect its validity.
>
> IMHO normal migration streams keep the device state and RAM data
> together in the dumped file, so they always match.
>
> In your shared case, the device states are in the dumped file however
> the RAM data is located somewhere else.  After you quit the VM from
> the 1st incoming migration the RAM is new (because that's a shared
> memory file) and the device data is still old.  They do not match
> already, then I'd say you can't migrate with that any more.
>
> If you want to do that, you'd better take snapshot of the RAM backend
> file if your filesystem supports (or even simpler, to back it up
> before hand) before you start any incoming migration.  Then with the
> dumped file (which contains the device states) and that snapshot file
> (which contains the exact RAM data that matches the device states)
> you'll alway be able to migrate for as many times as you want.
>

Understood, thanks Peter Xu
Is there any feasible way to indicate the snapshot of the RAM backend file
is
matched with the device data?
>VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688
>Failed to load virtio-scsi:virtio

Because I thought reporting above error is not so friendly. Could we add a
version id in both RAM backend file and device date file?

B.R.
Catherine
>
>
> Regards,
>
> --
> Peter Xu
>


Re: [Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-04-02 Thread Catherine Ho
Hi Peter Maydell

On Tue, 2 Apr 2019 at 11:05, Peter Maydell  wrote:

> On Tue, 2 Apr 2019 at 09:57, Catherine Ho 
> wrote:
> > The root cause is the used idx is moved forward after 1st time incoming,
> and in 2nd time incoming,
> > the last_avail_idx will be incorrectly restored from the saved device
> state file(not in the ram).
> >
> > I watched this even on x86 for a virtio-scsi disk
> >
> > Any ideas for supporting 2nd time, 3rd time... incoming restoring?
>
> Does the destination end go through reset between the 1st and 2nd
>
seems not, please see my step below

> incoming attempts? I'm not a migration expert, but I thought that
> devices were allowed to assume that their state is "state of the
> device following QEMU reset" before the start of an incoming
> migration attempt.
>

Here  is my step:
1. start guest normal by qemu with shared memory-backend file
2. stop the vm. save the device state to another file via monitor migrate
"exec: cat>..."
3. quit the vm
4. retore the vm by qemu -incoming "exec:cat ..."
5. continue the vm via monito, the 1st incoming works fine
6. quit the vm
7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time
8. continue   -> error happened
Actually, this can be fixed by forcely restore the idx by
virtio_queue_restore_last_avail_idx()
But I am sure whether it is reasonable.

B.R.

>
> thanks
> -- PMM
>


Re: [Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-04-02 Thread Catherine Ho
On Tue, 2 Apr 2019 at 15:47, Catherine Ho  wrote:

> Hi Peter Maydell
>
> On Tue, 2 Apr 2019 at 11:05, Peter Maydell 
> wrote:
>
>> On Tue, 2 Apr 2019 at 09:57, Catherine Ho 
>> wrote:
>> > The root cause is the used idx is moved forward after 1st time
>> incoming, and in 2nd time incoming,
>> > the last_avail_idx will be incorrectly restored from the saved device
>> state file(not in the ram).
>> >
>> > I watched this even on x86 for a virtio-scsi disk
>> >
>> > Any ideas for supporting 2nd time, 3rd time... incoming restoring?
>>
>> Does the destination end go through reset between the 1st and 2nd
>>
> seems not, please see my step below
>
>> incoming attempts? I'm not a migration expert, but I thought that
>> devices were allowed to assume that their state is "state of the
>> device following QEMU reset" before the start of an incoming
>> migration attempt.
>>
>
> Here  is my step:
> 1. start guest normal by qemu with shared memory-backend file
> 2. stop the vm. save the device state to another file via monitor migrate
> "exec: cat>..."
> 3. quit the vm
>
via "quit" command of monitor

> 4. retore the vm by qemu -incoming "exec:cat ..."
> 5. continue the vm via monito, the 1st incoming works fine
> 6. quit the vm
> 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time
> 8. continue   -> error happened
> Actually, this can be fixed by forcely restore the idx by
> virtio_queue_restore_last_avail_idx()
> But I am sure whether it is reasonable.
>
s/sure/not sure

>
> B.R.
>
>>
>> thanks
>> -- PMM
>>
>


Re: [Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-04-01 Thread Catherine Ho
Hi all,
I found an insterested issue here besides writting "dtb" rom into ram.
That is, should qemu support incoming from the ignore-shared memory backend
file repeatedly?
After I resolve the issue of writting "dtb" rom into ram, the incoming from
the ignore-shared memory backend file works fine at the *first* time, but
failed in the 2nd time.
It will report:
VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688
Failed to load virtio-scsi:virtio
error while loading state for instance 0x0 of device
'virtio-mmio@0a003e00/virtio-scsi'
load of migration failed: Operation not permitted

The root cause is the used idx is moved forward after 1st time incoming,
and in 2nd time incoming,
the last_avail_idx will be incorrectly restored from the saved device state
file(not in the ram).

I watched this even on x86 for a virtio-scsi disk

Any ideas for supporting 2nd time, 3rd time... incoming restoring?

B.R.
Catherine


On Wed, 27 Mar 2019 at 08:45, Catherine Ho  wrote:

> Hi all, thanks for the comments, I am digging into another potential
> error in the
> ignore-shared live migration. After that, I will send out the v2 asap
> B.R.
> Catherine
>
>
> On Mon, 25 Mar 2019 at 11:39, Peter Xu  wrote:
> >
> > On Fri, Mar 22, 2019 at 10:12:11AM +, Dr. David Alan Gilbert wrote:
> >
> > [...]
> >
> > > > In general, when we reset the system, we want to bring it
> > > > back to exactly the state that it was in when QEMU was
> > > > first started. That means we need to reload all the rom blob
> > > > data into memory (because the guest might have modified
> > > > that memory while it was running).
> > > >
> > > > If I understand correctly from other threads, the idea is
> > > > that some of the RAM is shared between source and destination
> > > > so it does not need to be manually copied during migration.
> > > > If that is correct, then perhaps the right thing is that
> > > > in the rom_reset code:
> > > >  * if this rom blob is being loaded into a "shared" ram block
> > > >  * and this reset is happening specifically before an
> > > >inbound migration
> > > >  * then skip loading the rom blob data
> > > >
> > > > ?
> > > >
> > > > I don't know the right way to determine either the "is this
> > > > a shared ram area" or "is this the reset prior to inbound
> > > > migration", but perhaps you can fill that in.
> > >
> > > Right, so in Catherine's patch there's a simple in_incoming_migration
> > > and checking ramblock_is_ignored;  it might be better to use the
> > > qemu_ram_is_shared() call and I wonder if we can just check for being
> in
> > > RUN_STATE_INMIGRATE - if that's early enough.
> >
> > Yes I feel like runstate_check(RUN_STATE_INMIGRATE) should be enough
> > to replace the new variable.  And I'm even thinking whether we need to
> > check qemu_ram_is_shared() at all... if rom_reset() simply refills the
> > ROM data into the RAMs, then IIUC that operation could be skipped for
> > all incoming migrations because all those ROM data (no matter they are
> > filled into shared RAM or not) should already have been filled on the
> > source side and:
> >
> > - if the ROM data's RAMBlock is shared backend, the latest data should
> >   already been there on the shared backend files, or,
> >
> > - if the ROM data's RAMBlock is not shared backend, they'll eventually
> >   be migrated to the destination later on after this rom_reset() on
> >   destination by the general RAM migration code.
> >
> > Regards,
> >
> > --
> > Peter Xu
>


Re: [Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-03-26 Thread Catherine Ho
Hi all, thanks for the comments, I am digging into another potential
error in the
ignore-shared live migration. After that, I will send out the v2 asap
B.R.
Catherine


On Mon, 25 Mar 2019 at 11:39, Peter Xu  wrote:
>
> On Fri, Mar 22, 2019 at 10:12:11AM +, Dr. David Alan Gilbert wrote:
>
> [...]
>
> > > In general, when we reset the system, we want to bring it
> > > back to exactly the state that it was in when QEMU was
> > > first started. That means we need to reload all the rom blob
> > > data into memory (because the guest might have modified
> > > that memory while it was running).
> > >
> > > If I understand correctly from other threads, the idea is
> > > that some of the RAM is shared between source and destination
> > > so it does not need to be manually copied during migration.
> > > If that is correct, then perhaps the right thing is that
> > > in the rom_reset code:
> > >  * if this rom blob is being loaded into a "shared" ram block
> > >  * and this reset is happening specifically before an
> > >inbound migration
> > >  * then skip loading the rom blob data
> > >
> > > ?
> > >
> > > I don't know the right way to determine either the "is this
> > > a shared ram area" or "is this the reset prior to inbound
> > > migration", but perhaps you can fill that in.
> >
> > Right, so in Catherine's patch there's a simple in_incoming_migration
> > and checking ramblock_is_ignored;  it might be better to use the
> > qemu_ram_is_shared() call and I wonder if we can just check for being in
> > RUN_STATE_INMIGRATE - if that's early enough.
>
> Yes I feel like runstate_check(RUN_STATE_INMIGRATE) should be enough
> to replace the new variable.  And I'm even thinking whether we need to
> check qemu_ram_is_shared() at all... if rom_reset() simply refills the
> ROM data into the RAMs, then IIUC that operation could be skipped for
> all incoming migrations because all those ROM data (no matter they are
> filled into shared RAM or not) should already have been filled on the
> source side and:
>
> - if the ROM data's RAMBlock is shared backend, the latest data should
>   already been there on the shared backend files, or,
>
> - if the ROM data's RAMBlock is not shared backend, they'll eventually
>   be migrated to the destination later on after this rom_reset() on
>   destination by the general RAM migration code.
>
> Regards,
>
> --
> Peter Xu



Re: [Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-03-21 Thread Catherine Ho
Thanks, Peter
See my comments below, please

On Thu, 21 Mar 2019 at 14:10, Peter Xu  wrote:
>
> On Wed, Mar 20, 2019 at 04:05:58PM +0800, chenxi He wrote:
> > On Wed, 20 Mar 2019 at 13:07, Peter Xu  wrote:
> > >
> > > On Tue, Mar 19, 2019 at 11:49:22AM -0400, Catherine Ho wrote:
> > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability")
> > > > addes a ignore-shared capability to bypass the shared ramblock (e,g,
> > > > membackend + numa node). It does good to live migration.
> > > >
> > > > This commit expected that QEMU doesn't write to guest RAM until
> > > > VM starts, but it does on aarch64 qemu:
> > > > Backtrace:
> > > > 1  0x55f4a296dd84 in address_space_write_rom_internal () at 
> > > > exec.c:3458
> > > > 2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
> > > > 3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
> > > > 4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
> > > > 5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
> > > > 6  0x55f4a2c9851d in main () at vl.c:4552
> > > >
> > > > In address_space_write_rom_internal, if the ramblock is RAM of
> > > > ramblock_is_ignored(), memcpy this ramblock will cause the 
> > > > memory-backend
> > > > file corruption.
> > > > But in normal case (!in_incoming_migration), this logic should be 
> > > > reserved.
> > >
> > > (I am sorry if these questions are silly...)
> > >
> > > Could I ask when a ROM will be changed during execution of the guest,
> > > and why?
> > Sure :),  as told by Peter
> > "as part of reset, we write the contents of ROM blobs, ELF files loaded 
> > through
> > -kernel, etc, to the guest memory."
>
> I see...
>
> I tried to dig on how x86 implemented the "-kernel" parameter and I
> noticed that it's dumping the kernel image into fw_cfg and probably
> that's also the reason why this should not be a problem for x86
> because rom reset on x86 won't overwrite the image.  Meanwhile, it
> seems totally different comparing to what has been done by ARM, which
> should probably be using rom_add_elf_program() to load the image if my
> reading is correct, so the kernel image is a ROM on ARM even if it can
> be written... Is my understanding correct?
>
> With that, I still feel that hacking into the memory write functions
> are tricky and I feel like it could bring hard to debug issues.  Would
> it be possible that we identify somehow that this ROM is a fake one
> (since it can actually be written) and we ignore the reset of it when
> proper (e.g., the initial reset on destination)?
I found that special rom block is "dtb"
See
commit 4c4bf654746eae5a042bde6c150d534d8849b762
Author: Ard Biesheuvel 
Date:   Fri Sep 12 14:06:50 2014 +0100

hw/arm/boot: load DTB as a ROM image

In order to make the device tree blob (DTB) available in memory not only at
first boot, but also after system reset, use rom_blob_add_fixed() to install
it into memory.

So in igore-shared case, dtb is not required here ?
Maybe I could add a flag in struct Rom to set it when the rom is created by
rom_add_blob_fixed_as()? And in rom_reset, just bypass this rom when
in_incoming_migration && ignore_shared

[+ Ard Biesheuvel]
Best Regard,
Catherine

>
> Thanks,

>
> --
> Peter Xu



[Qemu-devel] [PATCH] migration: avoid copying ignore-shared ramblock when in incoming migration

2019-03-19 Thread Catherine Ho
Commit 18269069c310 ("migration: Introduce ignore-shared capability") 
addes a ignore-shared capability to bypass the shared ramblock (e,g,
membackend + numa node). It does good to live migration.

This commit expected that QEMU doesn't write to guest RAM until 
VM starts, but it does on aarch64 qemu:
Backtrace:
1  0x55f4a296dd84 in address_space_write_rom_internal () at exec.c:3458
2  0x55f4a296de3a in address_space_write_rom () at exec.c:3479
3  0x55f4a2d519ff in rom_reset () at hw/core/loader.c:1101
4  0x55f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
5  0x55f4a2c90a28 in qemu_system_reset () at vl.c:1675
6  0x55f4a2c9851d in main () at vl.c:4552

In address_space_write_rom_internal, if the ramblock is RAM of 
ramblock_is_ignored(), memcpy this ramblock will cause the memory-backend
file corruption.
But in normal case (!in_incoming_migration), this logic should be reserved.

Fixes: commit 18269069c310 ("migration: Introduce ignore-shared capability") 
Signed-off-by: Catherine Ho 
Suggested-by: Yury Kotov 
---
 exec.c| 4 
 include/exec/cpu-common.h | 1 +
 include/qemu/option.h | 1 +
 migration/ram.c   | 2 +-
 vl.c  | 2 ++
 5 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/exec.c b/exec.c
index 86a38d3b3b..f08321fb7a 100644
--- a/exec.c
+++ b/exec.c
@@ -47,6 +47,7 @@
 #include "exec/address-spaces.h"
 #include "sysemu/xen-mapcache.h"
 #include "trace-root.h"
+#include "qemu/option.h"
 
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
 #include 
@@ -3455,6 +3456,9 @@ static inline MemTxResult 
address_space_write_rom_internal(AddressSpace *as,
 l = memory_access_size(mr, l, addr1);
 } else {
 /* ROM/RAM case */
+if (in_incoming_migration && ramblock_is_ignored(mr->ram_block)) {
+break;
+}
 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
 switch (type) {
 case WRITE_DATA:
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index cef8b88a2a..c80b7248a6 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -76,6 +76,7 @@ void *qemu_ram_get_host_addr(RAMBlock *rb);
 ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
 ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
+bool ramblock_is_ignored(RAMBlock *block);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
 bool qemu_ram_is_migratable(RAMBlock *rb);
diff --git a/include/qemu/option.h b/include/qemu/option.h
index 844587cab3..7509435164 100644
--- a/include/qemu/option.h
+++ b/include/qemu/option.h
@@ -139,4 +139,5 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst, 
QemuOptsList *list);
 QDict *keyval_parse(const char *params, const char *implied_key,
 Error **errp);
 
+extern bool in_incoming_migration;
 #endif
diff --git a/migration/ram.c b/migration/ram.c
index 35bd6213e9..d6de9d335d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -159,7 +159,7 @@ out:
 return ret;
 }
 
-static bool ramblock_is_ignored(RAMBlock *block)
+bool ramblock_is_ignored(RAMBlock *block)
 {
 return !qemu_ram_is_migratable(block) ||
(migrate_ignore_shared() && qemu_ram_is_shared(block));
diff --git a/vl.c b/vl.c
index c1d5484e12..b79aec9ac3 100644
--- a/vl.c
+++ b/vl.c
@@ -145,6 +145,7 @@ bool enable_cpu_pm = false;
 int nb_nics;
 NICInfo nd_table[MAX_NICS];
 int autostart;
+bool in_incoming_migration;
 static enum {
 RTC_BASE_UTC,
 RTC_BASE_LOCALTIME,
@@ -3797,6 +3798,7 @@ int main(int argc, char **argv, char **envp)
 runstate_set(RUN_STATE_INMIGRATE);
 }
 incoming = optarg;
+in_incoming_migration = true;
 break;
 case QEMU_OPTION_only_migratable:
 /*
-- 
2.17.1




[Qemu-devel] [RFC PATCH] target/arm: Fix int128_make128 lo, hi order in paired_cmpxchg64_be

2019-01-31 Thread Catherine Ho
The lo,hi order is different from the comments. And in commit
1ec182c33379 ("target/arm: Convert to HAVE_CMPXCHG128"), it changes
the original code logic. So just restore the old code logic before this 
commit:
do_paired_cmpxchg64_be():
cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
newv = int128_make128(new_hi, new_lo);

Fixes: 1ec182c33379 ("target/arm: Convert to HAVE_CMPXCHG128")

Signed-off-by: Catherine Ho 
---
I didn't see any obvious real error case here, so set it as RFC

 target/arm/helper-a64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 101fa6d3ea..70850e564d 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -583,8 +583,8 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, 
uint64_t addr,
  * High and low need to be switched here because this is not actually a
  * 128bit store but two doublewords stored consecutively
  */
-Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
-Int128 newv = int128_make128(new_lo, new_hi);
+Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
+Int128 newv = int128_make128(new_hi, new_lo);
 Int128 oldv;
 uintptr_t ra = GETPC();
 uint64_t o0, o1;
-- 
2.17.1




[Qemu-devel] [PATCH] tcg: add early clober modifier in atomic16_cmpxchg on aarch64

2019-01-30 Thread Catherine Ho
Without this patch, gcc might up the Input/Output registers and
cause unpredictable error.

Fixes: 1ec182c33379 ("target/arm: Convert to HAVE_CMPXCHG128")

Signed-off-by: Catherine Ho 
---
 include/qemu/atomic128.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
index a6af22ff10..ddd0d55d31 100644
--- a/include/qemu/atomic128.h
+++ b/include/qemu/atomic128.h
@@ -68,7 +68,7 @@ static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 
cmp, Int128 new)
 "cbnz %w[tmp], 0b\n"
 "1:"
 : [mem] "+m"(*ptr), [tmp] "="(tmp),
-  [oldl] "="(oldl), [oldh] "=r"(oldh)
+  [oldl] "="(oldl), [oldh] "="(oldh)
 : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
   [newl] "r"(newl), [newh] "r"(newh)
 : "memory", "cc");
-- 
2.17.1




[Qemu-devel] [PATCH RESEND] tcg: add early clober modifier in atomic16_cmpxchg on aarch64

2019-01-30 Thread Catherine Ho
Without this patch, gcc might mess up the Input/Output registers and
cause unpredictable error.

Fixes: 1ec182c33379 ("target/arm: Convert to HAVE_CMPXCHG128")

Signed-off-by: Catherine Ho 
---
Resend: sent previous patch before subscribing maillist

 include/qemu/atomic128.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
index a6af22ff10..ddd0d55d31 100644
--- a/include/qemu/atomic128.h
+++ b/include/qemu/atomic128.h
@@ -68,7 +68,7 @@ static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 
cmp, Int128 new)
 "cbnz %w[tmp], 0b\n"
 "1:"
 : [mem] "+m"(*ptr), [tmp] "="(tmp),
-  [oldl] "="(oldl), [oldh] "=r"(oldh)
+  [oldl] "="(oldl), [oldh] "="(oldh)
 : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
   [newl] "r"(newl), [newh] "r"(newh)
 : "memory", "cc");
-- 
2.17.1